modern-fortran
diff --git a/‎CMakeLists.txt
Lines changed: 12 additions & 2 deletions b/‎CMakeLists.txt
Lines changed: 12 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 16 additions & 9 deletions b/‎README.md
Lines changed: 16 additions & 9 deletions
diff --git a/‎example/cnn_mnist.f90
Lines changed: 4 additions & 4 deletions b/‎example/cnn_mnist.f90
Lines changed: 4 additions & 4 deletions
diff --git a/‎example/cnn_mnist_1d.f90
Lines changed: 8 additions & 8 deletions b/‎example/cnn_mnist_1d.f90
Lines changed: 8 additions & 8 deletions
diff --git a/‎fpm.toml
Lines changed: 1 addition & 1 deletion b/‎fpm.toml
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/nf.f90
Lines changed: 5 additions & 0 deletions b/‎src/nf.f90
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/nf/nf_conv1d_layer.f90
Lines changed: 119 additions & 0 deletions b/‎src/nf/nf_conv1d_layer.f90
Lines changed: 119 additions & 0 deletions
@@ -24,6 +24,8 @@ add_library(neural-fortran
   src/nf/nf_avgpool3d_layer.f90
   src/nf/nf_avgpool3d_layer_submodule.f90
   src/nf/nf_base_layer.f90
+  src/nf/nf_conv1d_layer.f90
+  src/nf/nf_conv1d_layer_submodule.f90
   src/nf/nf_conv2d_layer.f90
   src/nf/nf_conv2d_layer_submodule.f90
   src/nf/nf_cross_attention_layer.f90
@@ -43,12 +45,18 @@ add_library(neural-fortran
   src/nf/nf_input3d_layer_submodule.f90
   src/nf/nf_layer_constructors.f90
   src/nf/nf_layer_constructors_submodule.f90
+  src/nf/nf_layernorm.f90
+  src/nf/nf_layernorm_submodule.f90
   src/nf/nf_layer.f90
   src/nf/nf_layer_submodule.f90
   src/nf/nf_locally_connected_1d_submodule.f90
   src/nf/nf_locally_connected_1d.f90
+  src/nf/nf_locally_connected1d_layer_submodule.f90
+  src/nf/nf_locally_connected1d_layer.f90
   src/nf/nf_linear2d_layer.f90
   src/nf/nf_linear2d_layer_submodule.f90
+  src/nf/nf_embedding_layer.f90
+  src/nf/nf_embedding_layer_submodule.f90
   src/nf/nf_loss.f90
   src/nf/nf_loss_submodule.f90
   src/nf/nf_maxpool1d_layer.f90
@@ -66,8 +74,10 @@ add_library(neural-fortran
   src/nf/nf_parallel.f90
   src/nf/nf_parallel_submodule.f90
   src/nf/nf_random.f90
-  src/nf/nf_reshape_layer.f90
-  src/nf/nf_reshape_layer_submodule.f90
+  src/nf/nf_reshape2d_layer.f90
+  src/nf/nf_reshape2d_layer_submodule.f90
+  src/nf/nf_reshape3d_layer.f90
+  src/nf/nf_reshape3d_layer_submodule.f90
   src/nf/nf_reshape2d_layer.f90
   src/nf/nf_reshape2d_layer_submodule.f90
   src/nf/nf_self_attention_layer.f90
 
@@ -15,8 +15,8 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
 
 ## Features
 
-* Training and inference of dense (fully connected) and convolutional neural
-  networks
+* Training and inference of dense (fully connected), convolutional (1-d and 2-d),
+  and transformer neural networks
 * Stochastic gradient descent optimizers: Classic, momentum, Nesterov momentum,
   RMSProp, Adagrad, Adam, AdamW
 * More than a dozen activation functions and their derivatives
@@ -30,16 +30,19 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
 | Layer type | Constructor name | Supported input layers | Rank of output array | Forward pass | Backward pass |
 |------------|------------------|------------------------|----------------------|--------------|---------------|
 | Input | `input` | n/a | 1, 2, 3 | n/a | n/a |
+| Embedding | `embedding` | n/a | 2 | ✅ | ✅ |
 | Dense (fully-connected) | `dense` | `input1d`, `dense`, `dropout`, `flatten` | 1 | ✅ | ✅ |
 | Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 | ✅ | ✅ |
-| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) |
+| Locally connected (1-d) | `locally_connected1d` | `input2d`, `locally_connected1d`, `conv1d`, `maxpool1d`, `reshape2d` | 2 | ✅ | ✅ |
+| Convolutional (1-d) | `conv1d` | `input2d`, `conv1d`, `maxpool1d`, `reshape2d` | 2 | ✅ | ✅ |
+| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ |
+| Max-pooling (1-d) | `maxpool1d` | `input2d`, `conv1d`, `maxpool1d`, `reshape2d` | 2 | ✅ | ✅ |
 | Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ |
-| Linear (2-d) | `linear2d` | `input2d`, `linear2d`, `self_attention` | 2 | ✅ | ✅ |
-| Self-attention | `self_attention` | `input2d`, `linear2d`, `self_attention` | 2 | ✅ | ✅ |
-| Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ |
-| Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 | ✅ | ✅ |
-
-(*) See Issue [#145](https://github.com/modern-fortran/neural-fortran/issues/145) regarding non-converging CNN training on the MNIST dataset.
+| Linear (2-d) | `linear2d` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 | ✅ | ✅ |
+| Self-attention | `self_attention` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 | ✅ | ✅ |
+| Layer Normalization | `layernorm` | `linear2d`, `self_attention` | 2 | ✅ | ✅ |
+| Flatten | `flatten` | `input2d`, `input3d`, `conv1d`, `conv2d`, `maxpool1d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ |
+| Reshape (1-d to 2-d or 3-d) | `reshape` | `dense`, `dropout`, `flatten`, `input1d` | 2, 3 | ✅ | ✅ |
 
 ## Getting started
 
@@ -259,13 +262,17 @@ It may be useful to read if you want to contribute a new feature to neural-fortr
 
 Thanks to all open-source contributors to neural-fortran:
 [awvwgk](https://github.com/awvwgk),
+[certik](https://github.com/certik),
 [ggoyman](https://github.com/ggoyman),
 [ivan-pi](https://github.com/ivan-pi),
 [jacobwilliams](https://github.com/jacobwilliams),
 [jvdp1](https://github.com/jvdp1),
 [jvo203](https://github.com/jvo203),
+[mathomp4](https://github.com/mathomp4),
 [milancurcic](https://github.com/milancurcic),
+[OneAdder](https://github.com/OneAdder),
 [pirpyn](https://github.com/pirpyn),
+[rico07](https://github.com/ricor07),
 [rouson](https://github.com/rouson),
 [rweed](https://github.com/rweed),
 [Spnetic-5](https://github.com/Spnetic-5),
 
@@ -12,15 +12,15 @@ program cnn_mnist
   real, allocatable :: validation_images(:,:), validation_labels(:)
   real, allocatable :: testing_images(:,:), testing_labels(:)
   integer :: n
-  integer, parameter :: num_epochs = 10
+  integer, parameter :: num_epochs = 250
 
   call load_mnist(training_images, training_labels, &
                   validation_images, validation_labels, &
                   testing_images, testing_labels)
 
   net = network([ &
     input(784), &
-    reshape([1,28,28]), &
+    reshape(1, 28, 28), &
     conv2d(filters=8, kernel_size=3, activation=relu()), &
     maxpool2d(pool_size=2), &
     conv2d(filters=16, kernel_size=3, activation=relu()), &
@@ -35,9 +35,9 @@ program cnn_mnist
     call net % train( &
       training_images, &
       label_digits(training_labels), &
-      batch_size=128, &
+      batch_size=16, &
       epochs=1, &
-      optimizer=sgd(learning_rate=3.) &
+      optimizer=sgd(learning_rate=0.001) &
     )
 
     print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( &
 
@@ -1,7 +1,7 @@
-program cnn_mnist
+program cnn_mnist_1d
 
     use nf, only: network, sgd, &
-      input, conv2d, maxpool1d, maxpool2d, flatten, dense, reshape, reshape2d, locally_connected_1d, &
+      input, conv1d, maxpool1d, flatten, dense, reshape, locally_connected1d, &
       load_mnist, label_digits, softmax, relu
 
     implicit none
@@ -12,18 +12,18 @@ program cnn_mnist
     real, allocatable :: validation_images(:,:), validation_labels(:)
     real, allocatable :: testing_images(:,:), testing_labels(:)
     integer :: n
-    integer, parameter :: num_epochs = 10
+    integer, parameter :: num_epochs = 250
 
     call load_mnist(training_images, training_labels, &
                     validation_images, validation_labels, &
                     testing_images, testing_labels)
 
     net = network([ &
       input(784), &
-      reshape2d([28,28]), &
-      locally_connected_1d(filters=8, kernel_size=3, activation=relu()), &
+      reshape(28, 28), &
+      locally_connected1d(filters=8, kernel_size=3, activation=relu()), &
       maxpool1d(pool_size=2), &
-      locally_connected_1d(filters=16, kernel_size=3, activation=relu()), &
+      locally_connected1d(filters=16, kernel_size=3, activation=relu()), &
       maxpool1d(pool_size=2), &
       dense(10, activation=softmax()) &
     ])
@@ -37,7 +37,7 @@ program cnn_mnist
         label_digits(training_labels), &
         batch_size=16, &
         epochs=1, &
-        optimizer=sgd(learning_rate=0.003) &
+        optimizer=sgd(learning_rate=0.01) &
       )
 
       print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( &
@@ -63,5 +63,5 @@ real function accuracy(net, x, y)
       accuracy = real(good) / size(x, dim=2)
     end function accuracy
 
-  end program cnn_mnist
+end program cnn_mnist_1d
 
@@ -1,5 +1,5 @@
 name = "neural-fortran"
-version = "0.19.0"
+version = "0.21.0"
 license = "MIT"
 author = "Milan Curcic"
 maintainer = "[email protected]"
 
@@ -4,12 +4,17 @@ module nf
   use nf_layer, only: layer
   use nf_layer_constructors, only: &
     conv2d, dense, flatten, input, maxpool1d, maxpool2d, reshape, reshape2d, locally_connected_1d, &
+    conv1d, &
     conv2d, &
     dense, &
     dropout, &
+    embedding, &
     flatten, &
     input, &
+    layernorm, &
     linear2d, &
+    locally_connected1d, &
+    maxpool1d, &
     maxpool2d, &
     reshape, &
     self_attention
 
@@ -0,0 +1,119 @@
+module nf_conv1d_layer
+    !! This modules provides a 1-d convolutional `conv1d` type.
+  
+    use nf_activation, only: activation_function
+    use nf_base_layer, only: base_layer
+    implicit none
+  
+    private
+    public :: conv1d_layer
+  
+    type, extends(base_layer) :: conv1d_layer
+  
+      integer :: width
+      integer :: height
+      integer :: channels
+      integer :: kernel_size
+      integer :: filters
+  
+      real, allocatable :: biases(:) ! size(filters)
+      real, allocatable :: kernel(:,:,:) ! filters x channels x window 
+      real, allocatable :: output(:,:) ! filters x output_width 
+      real, allocatable :: z(:,:) ! kernel .dot. input + bias
+  
+      real, allocatable :: dw(:,:,:) ! weight (kernel) gradients
+      real, allocatable :: db(:) ! bias gradients
+      real, allocatable :: gradient(:,:)
+  
+      class(activation_function), allocatable :: activation
+  
+    contains
+  
+      procedure :: forward
+      procedure :: backward
+      procedure :: get_gradients
+      procedure :: get_num_params
+      procedure :: get_params
+      procedure :: init
+      procedure :: set_params
+  
+    end type conv1d_layer
+  
+    interface conv1d_layer
+      module function conv1d_layer_cons(filters, kernel_size, activation) &
+        result(res)
+        !! `conv1d_layer` constructor function
+        integer, intent(in) :: filters
+        integer, intent(in) :: kernel_size
+        class(activation_function), intent(in) :: activation
+        type(conv1d_layer) :: res
+      end function conv1d_layer_cons
+    end interface conv1d_layer
+  
+    interface
+  
+      module subroutine init(self, input_shape)
+        !! Initialize the layer data structures.
+        !!
+        !! This is a deferred procedure from the `base_layer` abstract type.
+        class(conv1d_layer), intent(in out) :: self
+          !! A `conv1d_layer` instance
+        integer, intent(in) :: input_shape(:)
+          !! Input layer dimensions
+      end subroutine init
+  
+      pure module subroutine forward(self, input)
+        !! Apply a forward pass on the `conv1d` layer.
+        class(conv1d_layer), intent(in out) :: self
+          !! A `conv1d_layer` instance
+        real, intent(in) :: input(:,:)
+          !! Input data
+      end subroutine forward
+  
+      pure module subroutine backward(self, input, gradient)
+        !! Apply a backward pass on the `conv1d` layer.
+        class(conv1d_layer), intent(in out) :: self
+          !! A `conv1d_layer` instance
+        real, intent(in) :: input(:,:)
+          !! Input data (previous layer)
+        real, intent(in) :: gradient(:,:)
+          !! Gradient (next layer)
+      end subroutine backward
+  
+      pure module function get_num_params(self) result(num_params)
+        !! Get the number of parameters in the layer.
+        class(conv1d_layer), intent(in) :: self
+          !! A `conv1d_layer` instance
+        integer :: num_params
+          !! Number of parameters
+      end function get_num_params
+  
+      module function get_params(self) result(params)
+        !! Return the parameters (weights and biases) of this layer.
+        !! The parameters are ordered as weights first, biases second.
+        class(conv1d_layer), intent(in), target :: self
+          !! A `conv1d_layer` instance
+        real, allocatable :: params(:)
+          !! Parameters to get
+      end function get_params
+  
+      module function get_gradients(self) result(gradients)
+        !! Return the gradients of this layer.
+        !! The gradients are ordered as weights first, biases second.
+        class(conv1d_layer), intent(in), target :: self
+          !! A `conv1d_layer` instance
+        real, allocatable :: gradients(:)
+          !! Gradients to get
+      end function get_gradients
+  
+      module subroutine set_params(self, params)
+        !! Set the parameters of the layer.
+        class(conv1d_layer), intent(in out) :: self
+          !! A `conv1d_layer` instance
+        real, intent(in) :: params(:)
+          !! Parameters to set
+      end subroutine set_params
+  
+    end interface
+
+end module nf_conv1d_layer