Locally connected still not working

ricor07 · ricor07 · commit ad592da82458 · 2025-02-16T16:05:09.000+01:00
diff --git a/example/cnn_mnist_1d.f90 b/example/cnn_mnist_1d.f90
@@ -20,7 +20,7 @@ program cnn_mnist
   
     net = network([ &
       input(784), &
-      reshape_generalized([1,784]), &
+      reshape_generalized([28,28]), &
       locally_connected_1d(filters=8, kernel_size=3, activation=relu()), &
       maxpool1d(pool_size=2), &
       locally_connected_1d(filters=16, kernel_size=3, activation=relu()), &
@@ -36,8 +36,8 @@ program cnn_mnist
         training_images, &
         label_digits(training_labels), &
         batch_size=16, &
-        epochs=1, &
-        optimizer=sgd(learning_rate=0.1) &
+        epochs=5, &
+        optimizer=sgd(learning_rate=0.003) &
       )
   
       print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( &
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
@@ -8,6 +8,7 @@
   use nf_input2d_layer, only: input2d_layer
   use nf_input3d_layer, only: input3d_layer
   use nf_locally_connected_1d_layer, only: locally_connected_1d_layer
+  use nf_maxpool1d_layer, only: maxpool1d_layer
   use nf_maxpool2d_layer, only: maxpool2d_layer
   use nf_reshape_layer, only: reshape3d_layer
   use nf_reshape_layer_generalized, only: reshape_generalized_layer
@@ -61,7 +62,29 @@ pure module subroutine backward_2d(self, previous, gradient)
 
     ! Backward pass from a 2-d layer downstream currently implemented
     ! only for dense and flatten layers
-    ! CURRENTLY NO LAYERS, tbd: pull/197 and pull/199
+    
+    select type(this_layer => self % p)
+
+      type is(locally_connected_1d_layer)
+
+        select type(prev_layer => previous % p)
+          type is(maxpool1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(locally_connected_1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+        end select
+
+      type is(maxpool1d_layer) 
+
+        select type(prev_layer => previous % p)
+          type is(maxpool1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(locally_connected_1d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+        end select
+      
+      end select
+
   end subroutine backward_2d
 
 
@@ -152,6 +175,15 @@ pure module subroutine forward(self, input)
           type is(reshape3d_layer)
             call this_layer % forward(prev_layer % output)
         end select
+      
+      type is(maxpool1d_layer)
+
+        select type(prev_layer => input % p)
+          type is(locally_connected_1d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(maxpool1d_layer)
+            call this_layer % forward(prev_layer % output)
+        end select
 
       type is(maxpool2d_layer)
 
@@ -211,6 +243,8 @@ pure module subroutine get_output_1d(self, output)
         allocate(output, source=this_layer % output)
       type is(flatten_layer)
         allocate(output, source=this_layer % output)
+      type is(reshape_generalized_layer)
+        allocate(output, source=this_layer % output)
       class default
         error stop '1-d output can only be read from an input1d, dense, or flatten layer.'
 
@@ -227,8 +261,12 @@ pure module subroutine get_output_2d(self, output)
 
       type is(input2d_layer)
         allocate(output, source=this_layer % output)
+      type is(maxpool1d_layer)
+        allocate(output, source=this_layer % output)
       type is(locally_connected_1d_layer)
         allocate(output, source=this_layer % output)
+      !type is(reshape_generalized_layer)
+        !allocate(output, source=this_layer % output)
       class default
         error stop '1-d output can only be read from an input1d, dense, or flatten layer.'
 
@@ -279,6 +317,8 @@ impure elemental module subroutine init(self, input)
         self % layer_shape = shape(this_layer % output)
       type is(locally_connected_1d_layer)
         self % layer_shape = shape(this_layer % output)
+      type is(maxpool1d_layer)
+        self % layer_shape = shape(this_layer % output)
       type is(maxpool2d_layer)
         self % layer_shape = shape(this_layer % output)
       type is(flatten_layer)
@@ -324,6 +364,8 @@ elemental module function get_num_params(self) result(num_params)
         num_params = this_layer % get_num_params()
       type is (locally_connected_1d_layer)
         num_params = this_layer % get_num_params()
+      type is(maxpool1d_layer)
+        num_params = 0
       type is (maxpool2d_layer)
         num_params = 0
       type is (flatten_layer)
@@ -355,6 +397,8 @@ module function get_params(self) result(params)
         params = this_layer % get_params()
       type is (locally_connected_1d_layer)
         params = this_layer % get_params()
+      type is (maxpool1d_layer)
+        ! No parameters to get.
       type is (maxpool2d_layer)
         ! No parameters to get.
       type is (flatten_layer)
@@ -386,6 +430,8 @@ module function get_gradients(self) result(gradients)
         gradients = this_layer % get_gradients()
       type is (locally_connected_1d_layer)
         gradients = this_layer % get_gradients()
+      type is (maxpool1d_layer)
+        ! No gradients to get.
       type is (maxpool2d_layer)
         ! No gradients to get.
       type is (flatten_layer)
@@ -443,6 +489,11 @@ module subroutine set_params(self, params)
       
       type is (locally_connected_1d_layer)
         call this_layer % set_params(params)
+      
+      type is (maxpool1d_layer)
+        ! No parameters to set.
+        write(stderr, '(a)') 'Warning: calling set_params() ' &
+          // 'on a zero-parameter layer; nothing to do.'
 
       type is (maxpool2d_layer)
         ! No parameters to set.
diff --git a/src/nf/nf_locally_connected_1d_submodule.f90 b/src/nf/nf_locally_connected_1d_submodule.f90
@@ -36,9 +36,9 @@ module subroutine init(self, input_shape)
     ! Kernel of shape filters x channels x kernel_size
     allocate(self % kernel(self % filters, self % channels, self % kernel_size))
 
-    ! Initialize the kernel with random values
+    ! Initialize the kernel with random values with a normal distribution
     call random_normal(self % kernel)
-    self % kernel = self % kernel / self % kernel_size
+    self % kernel = self % kernel / self % kernel_size ** 2
 
     allocate(self % biases(self % filters))
     self % biases = 0
@@ -62,50 +62,112 @@ pure module subroutine forward(self, input)
     class(locally_connected_1d_layer), intent(in out) :: self
     real, intent(in) :: input(:,:)
     integer :: input_width, input_channels
-    integer :: i, n
+    integer :: i, n, i_out
+    integer :: iws, iwe
+    integer :: half_window
 
+    ! Get input dimensions
     input_channels = size(input, dim=1)
-    input_width = size(input, dim=2)
+    input_width    = size(input, dim=2)
 
-    do concurrent (i = 1:self % width)
-      do concurrent (n = 1:self % filters)
-        self % z(n,i) = sum(self % kernel(n,:,:)* input(:,i:i+self % kernel_size-1))
+    ! For a kernel of odd size, half_window = kernel_size / 2 (integer division)
+    half_window = self % kernel_size / 2
+
+    ! Loop over output indices rather than input indices.
+    do i_out = 1, self % width
+      ! Compute the corresponding center index in the input.
+      i = i_out + half_window
+
+      ! Define the window in the input corresponding to the filter kernel
+      iws = i - half_window
+      iwe = i + half_window
+
+      ! Compute the inner tensor product (sum of element-wise products)
+      ! for each filter across all channels and positions in the kernel.
+      do concurrent(n = 1:self % filters)
+        self % z(n, i_out) = sum(self % kernel(n, :, :) * input(:, iws:iwe))
       end do
-    end do
 
-    ! Add bias
-    self % z = self % z + reshape(self % biases, shape(self % z))
+      ! Add the bias for each filter.
+      self % z(:, i_out) = self % z(:, i_out) + self % biases
+    end do
 
-    ! Apply activation
+    ! Apply the activation function to get the final output.
     self % output = self % activation % eval(self % z)
-
   end subroutine forward
 
+
   pure module subroutine backward(self, input, gradient)
     implicit none
     class(locally_connected_1d_layer), intent(in out) :: self
-    real, intent(in) :: input(:,:)
-    real, intent(in) :: gradient(:,:)
+    real, intent(in) :: input(:,:)     ! shape: (channels, width)
+    real, intent(in) :: gradient(:,:)  ! shape: (filters, width)
+    
+    ! Local gradient arrays:
     real :: db(self % filters)
     real :: dw(self % filters, self % channels, self % kernel_size)
-    real :: gdz(self % filters, self % width)
+    real :: gdz(self % filters, size(input, 2))
+    
     integer :: i, n, k
-
-    gdz = gradient * self % activation % eval_prime(self % z)
-
+    integer :: input_channels, input_width
+    integer :: istart, iend
+    integer :: iws, iwe
+    integer :: half_window
+  
+    ! Get input dimensions.
+    input_channels = size(input, dim=1)
+    input_width    = size(input, dim=2)
+  
+    ! For an odd-sized kernel, half_window = kernel_size / 2.
+    half_window = self % kernel_size / 2
+  
+    ! Define the valid output range so that the full input window is available.
+    istart = half_window + 1
+    iend   = input_width - half_window
+  
+    !---------------------------------------------------------------------
+    ! Compute the local gradient: gdz = (dL/dy) * sigma'(z)
+    ! We assume self%z stores the pre-activation values from the forward pass.
+    gdz = 0.0
+    gdz(:, istart:iend) = gradient(:, istart:iend) * self % activation % eval_prime(self % z(:, istart:iend))
+  
+    !---------------------------------------------------------------------
+    ! Compute gradient with respect to biases:
+    ! dL/db(n) = sum_{i in valid range} gdz(n, i)
     do concurrent (n = 1:self % filters)
-      db(n) = sum(gdz(n,:))
+      db(n) = sum(gdz(n, istart:iend))
     end do
-
-    dw = 0
-    self % gradient = 0
-    do concurrent (n = 1:self % filters, k = 1:self % channels, i = 1:self % width)
-      dw(n,k,:) = dw(n,k,:) + input(k, i:i+self % kernel_size-1) * gdz(n, i)
+  
+    ! Initialize weight gradient and input gradient accumulators.
+    dw = 0.0
+    self % gradient = 0.0  ! This array is assumed preallocated to shape (channels, width)
+  
+    !---------------------------------------------------------------------
+    ! Accumulate gradients over valid output positions.
+    ! For each output position i, determine the corresponding input window indices.
+    do concurrent (n = 1:self % filters, &
+                     k = 1:self % channels, &
+                     i = istart:iend)
+      ! The input window corresponding to output index i:
+      iws = i - half_window
+      iwe = i + half_window
+  
+      ! Weight gradient (dL/dw):
+      ! For each kernel element, the contribution is the product of the input in the window
+      ! and the local gradient at the output position i.
+      dw(n, k, :) = dw(n, k, :) + input(k, iws:iwe) * gdz(n, i)
+  
+      ! Input gradient (dL/dx):
+      ! Distribute the effect of the output gradient back onto the input window,
+      ! weighted by the kernel weights.
+      self % gradient(k, iws:iwe) = self % gradient(k, iws:iwe) + self % kernel(n, k, :) * gdz(n, i)
     end do
-
+  
+    !---------------------------------------------------------------------
+    ! Accumulate the computed gradients into the layer's stored gradients.
     self % dw = self % dw + dw
     self % db = self % db + db
-
+  
   end subroutine backward
 
   pure module function get_num_params(self) result(num_params)
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
@@ -73,9 +73,9 @@ module function network_from_layers(layers) result(res)
             type is(conv2d_layer)
               res % layers = [res % layers(:n-1), flatten(), res % layers(n:)]
               n = n + 1
-            !type is(locally_connected_1d_layer)
-              !res % layers = [res % layers(:n-1), flatten(), res % layers(n:)]
-              !n = n + 1
+            type is(locally_connected_1d_layer)
+              res % layers = [res % layers(:n-1), flatten(), res % layers(n:)]
+              n = n + 1
             type is(maxpool2d_layer)
               res % layers = [res % layers(:n-1), flatten(), res % layers(n:)]
               n = n + 1
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -2,6 +2,7 @@ foreach(execid
   input1d_layer
   input2d_layer
   input3d_layer
+  locally_connected_1d_layer
   parametric_activation
   dense_layer
   conv2d_layer
diff --git a/test/test_locally_connected_1d_layer.f90 b/test/test_locally_connected_1d_layer.f90
diff --git a/test/test_reshape_generalized_layer.f90 b/test/test_reshape_generalized_layer.f90