Skip to content

Commit 8e923a0

Browse files
committed
Resolve conflicts with main
2 parents d4a87e2 + e628d1e commit 8e923a0

16 files changed

+1034
-27
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,16 @@ add_library(neural-fortran
3939
src/nf/nf_input3d_layer_submodule.f90
4040
src/nf/nf_layer_constructors.f90
4141
src/nf/nf_layer_constructors_submodule.f90
42+
src/nf/nf_layernorm.f90
43+
src/nf/nf_layernorm_submodule.f90
4244
src/nf/nf_layer.f90
4345
src/nf/nf_layer_submodule.f90
4446
src/nf/nf_locally_connected_1d_submodule.f90
4547
src/nf/nf_locally_connected_1d.f90
4648
src/nf/nf_linear2d_layer.f90
4749
src/nf/nf_linear2d_layer_submodule.f90
50+
src/nf/nf_embedding_layer.f90
51+
src/nf/nf_embedding_layer_submodule.f90
4852
src/nf/nf_loss.f90
4953
src/nf/nf_loss_submodule.f90
5054
src/nf/nf_maxpool1d_layer.f90

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,14 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
3030
| Layer type | Constructor name | Supported input layers | Rank of output array | Forward pass | Backward pass |
3131
|------------|------------------|------------------------|----------------------|--------------|---------------|
3232
| Input | `input` | n/a | 1, 2, 3 | n/a | n/a |
33+
| Embedding | `embedding` | n/a | 2 |||
3334
| Dense (fully-connected) | `dense` | `input1d`, `dense`, `dropout`, `flatten` | 1 |||
3435
| Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 |||
3536
| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 || ✅(*) |
3637
| Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 |||
37-
| Linear (2-d) | `linear2d` | `input2d`, `linear2d`, `self_attention` | 2 |||
38-
| Self-attention | `self_attention` | `input2d`, `linear2d`, `self_attention` | 2 |||
38+
| Linear (2-d) | `linear2d` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 |||
39+
| Self-attention | `self_attention` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 |||
40+
| Layer Normalization | `layernorm` | `linear2d`, `self_attention` | 2 |||
3941
| Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 |||
4042
| Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 |||
4143

fpm.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = "neural-fortran"
2-
version = "0.19.0"
2+
version = "0.20.0"
33
license = "MIT"
44
author = "Milan Curcic"
55
maintainer = "[email protected]"

src/nf.f90

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,21 @@ module nf
33
use nf_datasets_mnist, only: label_digits, load_mnist
44
use nf_layer, only: layer
55
use nf_layer_constructors, only: &
6-
conv1d, conv2d, dense, dropout, flatten, input, linear2d, locally_connected_1d, &
7-
maxpool1d, maxpool2d, reshape, reshape2d, self_attention
6+
conv1d, &
7+
conv2d, &
8+
dense, &
9+
dropout, &
10+
embedding, &
11+
flatten, &
12+
input, &
13+
layernorm, &
14+
linear2d, &
15+
locally_connected_1d, &
16+
maxpool1d, &
17+
maxpool2d, &
18+
reshape, &
19+
reshape2d, &
20+
self_attention
821
use nf_loss, only: mse, quadratic
922
use nf_metrics, only: corr, maxabs
1023
use nf_network, only: network

src/nf/nf_embedding_layer.f90

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
module nf_embedding_layer
2+
3+
use nf_activation, only: activation_function
4+
use nf_base_layer, only: base_layer
5+
6+
implicit none
7+
8+
private
9+
public :: embedding_layer
10+
11+
type, extends(base_layer) :: embedding_layer
12+
!! Embedding Layer
13+
!! Stores inputs as a trainable lookup table. Inputs are
14+
!! integer indicies in a dictionary of `vocab_size`.
15+
!! This layer converts them into a table of shape
16+
!! (`sequence_length`, `model_dimension`)
17+
integer :: sequence_length, vocab_size, model_dimension
18+
integer :: positional
19+
20+
real, allocatable :: weights(:, :)
21+
real, allocatable :: output(:, :)
22+
real, allocatable :: dw(:, :) ! weight gradients
23+
24+
contains
25+
26+
procedure :: backward
27+
procedure :: forward
28+
procedure :: positional_trigonometric
29+
procedure :: positional_absolute
30+
procedure :: init
31+
procedure :: get_num_params
32+
procedure :: get_params
33+
procedure :: get_gradients
34+
procedure :: set_params
35+
36+
end type embedding_layer
37+
38+
interface embedding_layer
39+
module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res)
40+
integer, intent(in) :: vocab_size, model_dimension
41+
integer, optional :: positional
42+
type(embedding_layer) :: res
43+
end function embedding_layer_cons
44+
end interface embedding_layer
45+
46+
interface
47+
pure module subroutine forward(self, input)
48+
!! Get vectors by indicis in the dictionary
49+
class(embedding_layer), intent(in out) :: self
50+
integer, intent(in) :: input(:)
51+
end subroutine forward
52+
53+
pure module subroutine backward(self, input, gradient)
54+
!! Update gradient at `input` indices
55+
!! dw_i = W_i + d_output_i
56+
class(embedding_layer), intent(in out) :: self
57+
integer, intent(in) :: input(:)
58+
real, intent(in) :: gradient(:, :)
59+
end subroutine backward
60+
61+
pure module subroutine positional_trigonometric(self, pos)
62+
!! Sum embedding with positional info (trigonometric, not trianable)
63+
class(embedding_layer), intent(in out) :: self
64+
integer, intent(in) :: pos
65+
end subroutine positional_trigonometric
66+
67+
pure module subroutine positional_absolute(self, pos)
68+
!! Sum embedding with absolute position
69+
class(embedding_layer), intent(in out) :: self
70+
integer, intent(in) :: pos
71+
end subroutine positional_absolute
72+
73+
module subroutine init(self, input_shape)
74+
class(embedding_layer), intent(in out) :: self
75+
integer, intent(in) :: input_shape(:)
76+
end subroutine init
77+
78+
pure module function get_num_params(self) result(num_params)
79+
class(embedding_layer), intent(in) :: self
80+
integer :: num_params
81+
end function get_num_params
82+
83+
module function get_params(self) result(params)
84+
class(embedding_layer), intent(in), target :: self
85+
real, allocatable :: params(:)
86+
end function get_params
87+
88+
module function get_gradients(self) result(gradients)
89+
class(embedding_layer), intent(in), target :: self
90+
real, allocatable :: gradients(:)
91+
end function get_gradients
92+
93+
module subroutine set_params(self, params)
94+
class(embedding_layer), intent(in out) :: self
95+
real, intent(in), target :: params(:)
96+
end subroutine set_params
97+
end interface
98+
end module nf_embedding_layer
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
#define NONE 0
2+
#define TRIGONOMETRIC 1
3+
#define ABSOLUTE 2
4+
5+
submodule(nf_embedding_layer) nf_embedding_layer_submodule
6+
use nf_base_layer, only: base_layer
7+
implicit none
8+
contains
9+
module function embedding_layer_cons(vocab_size, model_dimension, positional) result(res)
10+
integer, intent(in) :: vocab_size, model_dimension
11+
integer, optional :: positional
12+
type(embedding_layer) :: res
13+
14+
res % vocab_size = vocab_size
15+
res % model_dimension = model_dimension
16+
if (.not. present(positional)) then
17+
res % positional = NONE
18+
else
19+
res % positional = positional
20+
end if
21+
end function embedding_layer_cons
22+
23+
module subroutine init(self, input_shape)
24+
class(embedding_layer), intent(in out) :: self
25+
integer, intent(in) :: input_shape(:)
26+
27+
self % sequence_length = input_shape(1)
28+
29+
allocate(self % output(self % sequence_length, self % model_dimension))
30+
31+
allocate(self % weights(self % vocab_size, self % model_dimension))
32+
self % weights = 0.1
33+
34+
allocate(self % dw(self % vocab_size, self % model_dimension))
35+
self % dw = 0.0
36+
end subroutine init
37+
38+
pure module subroutine forward(self, input)
39+
class(embedding_layer), intent(in out) :: self
40+
integer, intent(in) :: input(:)
41+
integer :: i, index
42+
43+
do concurrent(i = 1: self % sequence_length)
44+
index = input(i)
45+
if (index > size(self % weights, 1)) then
46+
index = 1
47+
elseif (index == 0) then
48+
index = 1
49+
end if
50+
51+
self % output(i, :) = self % weights(index, :)
52+
53+
if (self % positional == TRIGONOMETRIC) then
54+
call self % positional_trigonometric(i)
55+
elseif (self % positional == ABSOLUTE) then
56+
call self % positional_absolute(i)
57+
end if
58+
end do
59+
end subroutine forward
60+
61+
pure module subroutine backward(self, input, gradient)
62+
class(embedding_layer), intent(in out) :: self
63+
integer, intent(in) :: input(:)
64+
real, intent(in) :: gradient(:, :)
65+
integer :: i
66+
67+
do concurrent(i = 1: self % sequence_length)
68+
self % dw(input(i), :) = self % dw(input(i), :) + gradient(i, :)
69+
end do
70+
end subroutine backward
71+
72+
pure module subroutine positional_trigonometric(self, pos)
73+
class(embedding_layer), intent(in out) :: self
74+
integer, intent(in) :: pos
75+
integer :: i
76+
real :: theta
77+
78+
do concurrent(i = 1: floor(real(self % model_dimension) / 2))
79+
theta = (pos - 1) / 10000 ** (real(2 * (i-1)) / self % model_dimension)
80+
self % output(pos, 2 * i - 1) = self % output(pos, 2 * i - 1) + sin(theta)
81+
self % output(pos, 2 * i) = self % output(pos, 2 * i) + cos(theta)
82+
end do
83+
end subroutine positional_trigonometric
84+
85+
pure module subroutine positional_absolute(self, pos)
86+
class(embedding_layer), intent(in out) :: self
87+
integer, intent(in) :: pos
88+
integer :: i
89+
90+
do concurrent(i = 1: self % model_dimension)
91+
self % output(pos, i) = self % output(pos, i) + pos - 1
92+
end do
93+
end subroutine positional_absolute
94+
95+
pure module function get_num_params(self) result(num_params)
96+
class(embedding_layer), intent(in) :: self
97+
integer :: num_params
98+
num_params = self % vocab_size * self % model_dimension
99+
end function get_num_params
100+
101+
module function get_params(self) result(params)
102+
class(embedding_layer), intent(in), target :: self
103+
real, allocatable :: params(:)
104+
real, pointer :: w_(:) => null()
105+
106+
w_(1: product(shape(self % weights))) => self % weights
107+
params = w_
108+
end function get_params
109+
110+
module function get_gradients(self) result(gradients)
111+
class(embedding_layer), intent(in), target :: self
112+
real, allocatable :: gradients(:)
113+
real, pointer :: dw_(:) => null()
114+
115+
dw_(1: product(shape(self % dw))) => self % dw
116+
gradients = dw_
117+
end function get_gradients
118+
119+
module subroutine set_params(self, params)
120+
class(embedding_layer), intent(in out) :: self
121+
real, intent(in), target :: params(:)
122+
123+
real, pointer :: p_(:,:) => null()
124+
125+
! check if the number of parameters is correct
126+
if (size(params) /= self % get_num_params()) then
127+
error stop 'Error: number of parameters does not match'
128+
end if
129+
130+
associate(n => self % vocab_size * self % model_dimension)
131+
! reshape the weights
132+
p_(1:self % vocab_size, 1:self % model_dimension) => params(1 : n)
133+
self % weights = p_
134+
end associate
135+
136+
end subroutine set_params
137+
end submodule nf_embedding_layer_submodule

src/nf/nf_layer_constructors.f90

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,16 @@ module nf_layer_constructors
1414
dense, &
1515
dropout, &
1616
flatten, &
17-
input, locally_connected_1d, maxpool1d, &
17+
input, &
1818
linear2d, &
19+
locally_connected_1d, &
20+
maxpool1d, &
1921
maxpool2d, &
20-
reshape, reshape2d, &
21-
self_attention
22+
reshape, &
23+
reshape2d, &
24+
self_attention, &
25+
embedding, &
26+
layernorm
2227

2328
interface input
2429

@@ -310,15 +315,40 @@ module function linear2d(out_features) result(res)
310315
!! Resulting layer instance
311316
end function linear2d
312317

313-
module function self_attention(num_heads) result(res)
314-
!! Rank-2 (sequence_length, out_features) self attention constructor.
315-
!! sequence_length and model_dimension are determined at layer initialization, based on the
316-
!! output shape of the previous layer.
317-
integer, intent(in) :: num_heads
318-
!! Number of attention heads
319-
type(layer) :: res
320-
!! Resulting layer instance
321-
end function self_attention
318+
module function self_attention(num_heads) result(res)
319+
!! Rank-2 (sequence_length, out_features) self attention constructor.
320+
!! sequence_length and model_dimension are determined at layer initialization, based on the
321+
!! output shape of the previous layer.
322+
integer, intent(in) :: num_heads
323+
!! Number of attention heads
324+
type(layer) :: res
325+
!! Resulting layer instance
326+
end function self_attention
327+
328+
module function embedding(sequence_length, vocab_size, model_dimension, positional) result(res)
329+
!! Embedding layer constructor.
330+
!!
331+
!! This layer is for inputting token indices from the dictionary to the network.
332+
!! Works as a trainable lookup table that converts each index into a vector.
333+
!! Embedding layer must be the first layer in a network.
334+
integer, intent(in) :: sequence_length
335+
!! max len of input sequence
336+
integer, intent(in) :: vocab_size
337+
!! length of token vocabulary
338+
integer, intent(in) :: model_dimension
339+
!! size of target embeddings
340+
integer, optional, intent(in) :: positional
341+
!! positional encoding
342+
type(layer) :: res
343+
end function embedding
344+
345+
module function layernorm() result(res)
346+
!! Layer Normalization
347+
!! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta
348+
!! Based upon `Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton(2016)`:
349+
!! https://arxiv.org/abs/1607.06450v1
350+
type(layer) :: res
351+
end function layernorm
322352

323353
end interface
324354

0 commit comments

Comments
 (0)