Skip to content

Commit 039638d

Browse files
milancurcicVandenplas, Jeremie
and
Vandenplas, Jeremie
authored
Dropout layer (#194)
* First stab at dropout; conflict with base type TODO * Partial dropout integration * Test uninitialized dropout layer * Test dropout state that follows an input layer * Enable forward pass for dropout; backward pass TODO * Version bump and add dropout to the features table * Add dropout to CMake * Enable preprocessing in fpm.toml (needed with recent versions of fpm) * Small change in scale implementation * Integration of backward pass for dropout * Reduce tolerance in conv2d convergence tests * Fix bug in dropout scaling Co-authored-by: Ricardo Orsi <@ricor07> * disable dropout in inference mode (net % predict); TODO enable in net % train * Set dropout's training mode to true in net % train(); add tests * WIP dropout tests * Dropout layers always in training mode; except when is called, when they are in inference mode * Update the layers table * Ensure the actual dropout rate == requested dropout rate in most cases * Accumulate the gradient in dropout % backward and flush in network % update * Guard against bad dropout rate * Connect the backward pass; expand tests * Expand tests * Use the reference scaling in dropout; don't accumulate gradients because it's not needed * Add dropout to MNIST example; small model changes * Add reference * Update print_info dropout * Update print_info * Compute scale once in dropout constructor * dropout % backward() doesn't need input from the previous layer * Timing info of dropout --------- Co-authored-by: Vandenplas, Jeremie <[email protected]>
1 parent c316ee1 commit 039638d

16 files changed

+582
-34
lines changed

CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ add_library(neural-fortran
5555
src/nf/nf_reshape_layer_submodule.f90
5656
src/nf/io/nf_io_binary.f90
5757
src/nf/io/nf_io_binary_submodule.f90
58+
src/nf/nf_dropout_layer.f90
59+
src/nf/nf_dropout_layer_submodule.f90
5860
)
5961

6062
target_link_libraries(neural-fortran PRIVATE)

README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,12 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
3030
| Layer type | Constructor name | Supported input layers | Rank of output array | Forward pass | Backward pass |
3131
|------------|------------------|------------------------|----------------------|--------------|---------------|
3232
| Input | `input` | n/a | 1, 2, 3 | n/a | n/a |
33-
| Dense (fully-connected) | `dense` | `input1d`, `flatten` | 1 |||
33+
| Dense (fully-connected) | `dense` | `input1d`, `dense`, `dropout`, `flatten` | 1 |||
34+
| Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 |||
3435
| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 || ✅(*) |
3536
| Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 |||
3637
| Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 |||
37-
| Linear (2-d) | `linear2d` | `input2d` | 2 |||
38+
| Linear (2-d) | `linear2d` | `input2d`, `linear2d` | 2 |||
3839
| Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 |||
3940

4041
(*) See Issue [#145](https://github.com/modern-fortran/neural-fortran/issues/145) regarding non-converging CNN training on the MNIST dataset.

example/dense_mnist.f90

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
program dense_mnist
22

3-
use nf, only: dense, input, network, sgd, label_digits, load_mnist, corr
3+
use nf, only: dense, input, network, sgd, label_digits, load_mnist, corr, relu, softmax, dropout
44

55
implicit none
66

@@ -17,8 +17,9 @@ program dense_mnist
1717

1818
net = network([ &
1919
input(784), &
20-
dense(30), &
21-
dense(10) &
20+
dense(64, relu()), &
21+
dropout(0.2), &
22+
dense(10, softmax()) &
2223
])
2324
num_epochs = 10
2425

@@ -32,7 +33,7 @@ program dense_mnist
3233
call net % train( &
3334
training_images, &
3435
label_digits(training_labels), &
35-
batch_size=100, &
36+
batch_size=128, &
3637
epochs=1, &
3738
optimizer=sgd(learning_rate=3.) &
3839
)

src/nf.f90

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ module nf
33
use nf_datasets_mnist, only: label_digits, load_mnist
44
use nf_layer, only: layer
55
use nf_layer_constructors, only: &
6-
conv2d, dense, flatten, input, maxpool2d, reshape, linear2d
6+
conv2d, dense, dropout, flatten, input, linear2d, maxpool2d, reshape
77
use nf_loss, only: mse, quadratic
88
use nf_metrics, only: corr, maxabs
99
use nf_network, only: network

src/nf/nf_dropout_layer.f90

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
module nf_dropout_layer
2+
3+
!! Dropout layer by Srivastava et al. (2014).
4+
!!
5+
!! Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I. and
6+
!! Salakhutdinov, R., 2014. Dropout: a simple way to prevent neural networks
7+
!! from overfitting. The Journal of Machine Learning Research, 16(1),
8+
!! pp.1929-1958.
9+
10+
use nf_base_layer, only: base_layer
11+
12+
implicit none
13+
14+
private
15+
public :: dropout_layer
16+
17+
type, extends(base_layer) :: dropout_layer
18+
!! Concrete implementation of a dropout layer type
19+
20+
integer :: input_size = 0
21+
22+
real, allocatable :: output(:)
23+
real, allocatable :: gradient(:)
24+
real, allocatable :: mask(:) ! binary mask for dropout
25+
26+
real :: dropout_rate ! probability of dropping a neuron
27+
real :: scale ! scale factor to preserve the input sum
28+
logical :: training = .true. ! set to .false. for inference
29+
30+
contains
31+
32+
procedure :: backward
33+
procedure :: forward
34+
procedure :: init
35+
36+
end type dropout_layer
37+
38+
interface dropout_layer
39+
module function dropout_layer_cons(rate) &
40+
result(res)
41+
!! This function returns the `dropout_layer` instance.
42+
real, intent(in) :: rate
43+
!! Dropout rate
44+
type(dropout_layer) :: res
45+
!! dropout_layer instance
46+
end function dropout_layer_cons
47+
end interface dropout_layer
48+
49+
interface
50+
51+
pure module subroutine backward(self, gradient)
52+
!! Apply the backward gradient descent pass.
53+
!! Only weight and bias gradients are updated in this subroutine,
54+
!! while the weights and biases themselves are untouched.
55+
class(dropout_layer), intent(in out) :: self
56+
!! Dropout layer instance
57+
real, intent(in) :: gradient(:)
58+
!! Gradient from the next layer
59+
end subroutine backward
60+
61+
module subroutine forward(self, input)
62+
!! Propagate forward the layer.
63+
!! Calling this subroutine updates the values of a few data components
64+
!! of `dropout_layer` that are needed for the backward pass.
65+
class(dropout_layer), intent(in out) :: self
66+
!! Dense layer instance
67+
real, intent(in) :: input(:)
68+
!! Input from the previous layer
69+
end subroutine forward
70+
71+
module subroutine init(self, input_shape)
72+
!! Initialize the layer data structures.
73+
!!
74+
!! This is a deferred procedure from the `base_layer` abstract type.
75+
class(dropout_layer), intent(in out) :: self
76+
!! Dropout layer instance
77+
integer, intent(in) :: input_shape(:)
78+
!! Shape of the input layer
79+
end subroutine init
80+
81+
end interface
82+
83+
end module nf_dropout_layer

src/nf/nf_dropout_layer_submodule.f90

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
submodule (nf_dropout_layer) nf_dropout_layer_submodule
2+
use nf_random, only: shuffle
3+
!! This submodule implements the procedures defined in the
4+
!! nf_dropout_layer module.
5+
6+
contains
7+
8+
module function dropout_layer_cons(rate) result(res)
9+
real, intent(in) :: rate
10+
type(dropout_layer) :: res
11+
res % dropout_rate = rate
12+
res % scale = 1 / (1 - rate)
13+
end function dropout_layer_cons
14+
15+
16+
module subroutine init(self, input_shape)
17+
class(dropout_layer), intent(in out) :: self
18+
integer, intent(in) :: input_shape(:)
19+
20+
self % input_size = input_shape(1)
21+
22+
! Allocate arrays
23+
allocate(self % output(self % input_size))
24+
allocate(self % gradient(self % input_size))
25+
allocate(self % mask(self % input_size))
26+
27+
! Initialize arrays
28+
self % output = 0
29+
self % gradient = 0
30+
self % mask = 1 ! Default mask is all ones (no dropout)
31+
32+
end subroutine init
33+
34+
35+
module subroutine forward(self, input)
36+
class(dropout_layer), intent(in out) :: self
37+
real, intent(in) :: input(:)
38+
39+
! Generate random mask for dropout, training mode only
40+
if (self % training) then
41+
42+
! Set the first dropout_rate number of elements to 0, the rest to 1,
43+
! and shuffle. Note that the selection of the elements rounds down to
44+
! the nearest integer, so in cases where size(input) * dropout_rate is
45+
! not an integer, the actual dropout rate will be slightly lower.
46+
self % mask = 1
47+
self % mask(:int(size(self % mask) * self % dropout_rate)) = 0
48+
call shuffle(self % mask)
49+
50+
! Apply dropout mask
51+
self % output = input * self % mask * self % scale
52+
53+
else
54+
! In inference mode, we don't apply dropout; simply pass through the input
55+
self % output = input
56+
57+
end if
58+
59+
end subroutine forward
60+
61+
62+
pure module subroutine backward(self, gradient)
63+
class(dropout_layer), intent(in out) :: self
64+
real, intent(in) :: gradient(:)
65+
self % gradient = gradient * self % mask * self % scale
66+
end subroutine backward
67+
68+
end submodule nf_dropout_layer_submodule

src/nf/nf_layer.f90

+1-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ end subroutine backward_3d
9191

9292
interface
9393

94-
pure module subroutine forward(self, input)
94+
module subroutine forward(self, input)
9595
!! Apply a forward pass on the layer.
9696
!! This changes the internal state of the layer.
9797
!! This is normally called internally by the `network % forward`

src/nf/nf_layer_constructors.f90

+19-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ module nf_layer_constructors
88
implicit none
99

1010
private
11-
public :: conv2d, dense, flatten, input, maxpool2d, reshape, linear2d
11+
public :: conv2d, dense, dropout, flatten, input, linear2d, maxpool2d, reshape
1212

1313
interface input
1414

@@ -104,6 +104,24 @@ module function dense(layer_size, activation) result(res)
104104
!! Resulting layer instance
105105
end function dense
106106

107+
module function dropout(rate) result(res)
108+
!! Create a dropout layer with a given dropout rate.
109+
!!
110+
!! This layer is for randomly disabling neurons during training.
111+
!!
112+
!! Example:
113+
!!
114+
!! ```
115+
!! use nf, only :: dropout, layer
116+
!! type(layer) :: dropout_layer
117+
!! dropout_layer = dropout(rate=0.5)
118+
!! ```
119+
real, intent(in) :: rate
120+
!! Dropout rate - fraction of neurons to randomly disable during training
121+
type(layer) :: res
122+
!! Resulting layer instance
123+
end function dropout
124+
107125
module function flatten() result(res)
108126
!! Flatten (3-d -> 1-d) layer constructor.
109127
!!

src/nf/nf_layer_constructors_submodule.f90

+11-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
use nf_layer, only: layer
44
use nf_conv2d_layer, only: conv2d_layer
55
use nf_dense_layer, only: dense_layer
6+
use nf_dropout_layer, only: dropout_layer
67
use nf_flatten_layer, only: flatten_layer
78
use nf_input1d_layer, only: input1d_layer
89
use nf_input2d_layer, only: input2d_layer
@@ -65,14 +66,23 @@ module function dense(layer_size, activation) result(res)
6566
end function dense
6667

6768

69+
module function dropout(rate) result(res)
70+
real, intent(in) :: rate
71+
type(layer) :: res
72+
if (rate < 0 .or. rate > 1) &
73+
error stop 'rate must be between 0 and 1 in a dropout layer'
74+
res % name = 'dropout'
75+
allocate(res % p, source=dropout_layer(rate))
76+
end function dropout
77+
78+
6879
module function flatten() result(res)
6980
type(layer) :: res
7081
res % name = 'flatten'
7182
allocate(res % p, source=flatten_layer())
7283
end function flatten
7384

7485

75-
7686
module function input1d(layer_size) result(res)
7787
integer, intent(in) :: layer_size
7888
type(layer) :: res

0 commit comments

Comments
 (0)