Standard conformance fixes (#190)

milancurcic · web-flow · commit 9a4011717aa3 · 2024-09-03T11:38:42.000-04:00
* Make various functions impure to allow returning polymorphic allocatables

* Update tested compilers

* More on tested compilers

* Update CMake definitions to make serial default and add PARALLEL macro
diff --git a/README.md b/README.md
@@ -63,9 +63,10 @@ Optional dependencies are:
 
 Compilers tested include:
 
-* gfortran-9.4.0
-* ifort-2021.4
-* ifx-2021.4
+* flang-new 20.0.0
+* gfortran 13.2.0, 14.0.1
+* ifort 2021.13.1
+* ifx 2024.2.1
 
 ### Building with fpm
 
@@ -85,7 +86,7 @@ Once installed, use the compiler wrappers `caf` and `cafrun` to build and execut
 in parallel, respectively:
 
 ```
-fpm build --compiler caf --profile release
+fpm build --compiler caf --profile release --flag "-cpp -DPARALLEL"
 ```
 
 #### Testing with fpm
@@ -107,7 +108,7 @@ See the [Fortran Package Manager](https://github.com/fortran-lang/fpm) for more
 ```
 mkdir build
 cd build
-cmake .. -DSERIAL=1
+cmake ..
 make
 ```
 
@@ -122,7 +123,7 @@ in parallel, respectively:
 
 
 ```
-FC=caf cmake ..
+FC=caf cmake .. -DPARALLEL
 make
 cafrun -n 4 bin/mnist # run MNIST example on 4 cores
 ```
@@ -139,7 +140,7 @@ FC=ifort cmake ..
 for a parallel build of neural-fortran, or
 
 ```
-FC=ifort cmake .. -DSERIAL=1
+FC=ifort cmake ..
 ```
 
 for a serial build.
diff --git a/cmake/compilers.cmake b/cmake/compilers.cmake
@@ -1,11 +1,12 @@
 # compiler flags for gfortran
 if(CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
 
-  if(SERIAL)
-    message(STATUS "Configuring to build with -fcoarray=single")
-    add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:-fcoarray=single>")
+  if(PARALLEL)
+    message(STATUS "Configuring to build with -fcoarray=shared")
+    add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:-fcoarray=shared>")
+    add_compile_definitions(PARALLEL)
   else()
-    add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:-fcoarray=lib>")
+    add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:-fcoarray=single>")
   endif()
 
   if(BLAS)
@@ -14,21 +15,22 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
     message(STATUS "Configuring build to use BLAS from ${BLAS}")
   endif()
 
-  add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<CONFIG:Debug>>:-fcheck=bounds;-fbacktrace>")
-  add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<CONFIG:Release>>:-Ofast;-fno-frontend-optimize;-fno-backtrace>")
+  add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<CONFIG:Debug>>:-cpp;-fcheck=bounds;-fbacktrace>")
+  add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<CONFIG:Release>>:-cpp;-Ofast;-fno-frontend-optimize;-fno-backtrace>")
 
 elseif(CMAKE_Fortran_COMPILER_ID MATCHES "^Intel")
   # compiler flags for ifort
 
-  if(SERIAL)
-    message(STATUS "Configuring to build with -coarray=single")
+  if(PARALLEL)
+    message(STATUS "Configuring to build with -coarray=shared")
     if(WIN32)
-      add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:/Qcoarray:single>")
-      add_link_options("$<$<COMPILE_LANGUAGE:Fortran>:/Qcoarray:single>")
+      add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:/Qcoarray:shared>")
+      add_link_options("$<$<COMPILE_LANGUAGE:Fortran>:/Qcoarray:shared>")
     else()
-      add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:-coarray=single>")
-      add_link_options("$<$<COMPILE_LANGUAGE:Fortran>:-coarray=single>")
+      add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:-coarray=shared>")
+      add_link_options("$<$<COMPILE_LANGUAGE:Fortran>:-coarray=shared>")
     endif()
+    add_compile_definitions(PARALLEL)
   else()
     if(WIN32)
       add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:/Qcoarray:shared>")
@@ -40,16 +42,16 @@ elseif(CMAKE_Fortran_COMPILER_ID MATCHES "^Intel")
   endif()
 
   if(WIN32)
-    string(APPEND CMAKE_Fortran_FLAGS " /assume:byterecl")
+    string(APPEND CMAKE_Fortran_FLAGS " /assume:byterecl /fpp")
   else()
-    string(APPEND CMAKE_Fortran_FLAGS " -assume byterecl")
+    string(APPEND CMAKE_Fortran_FLAGS " -assume byterecl -fpp")
   endif()
-  add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<CONFIG:Debug>>:-check;-traceback>")
-  # add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<CONFIG:Release>>:-O3>")
+  add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<CONFIG:Debug>>:-fpp;-check;-traceback>")
+  add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<CONFIG:Release>>:-fpp;-O3>")
 
 elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
   # compiler flags for Cray ftn
   string(APPEND CMAKE_Fortran_FLAGS " -h noomp")
-  add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<CONFIG:Debug>>:-O0;-g>")
-  add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<CONFIG:Release>>:-O3>")
+  add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<CONFIG:Debug>>:-e Z;-O0;-g>")
+  add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:Fortran>,$<CONFIG:Release>>:-e Z;-O3>")
 endif()
diff --git a/cmake/options.cmake b/cmake/options.cmake
@@ -1,4 +1,4 @@
-option(SERIAL "Serial execution")
+option(PARALLEL "Parallel execution")
 option(${PROJECT_NAME}_BUILD_TESTING "build ${PROJECT_NAME} tests" true)
 option(${PROJECT_NAME}_BUILD_EXAMPLES "build ${PROJECT_NAME} examples" true)
 
@@ -8,10 +8,10 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 
-if(SERIAL)
-  message(STATUS "Configuring build for serial execution")
-else()
+if(PARALLEL)
   message(STATUS "Configuring build for parallel execution")
+else()
+  message(STATUS "Configuring build for serial execution; configure with -DPARALLEL=1 for a parallel build")
 endif()
 
 # --- Generally useful CMake project options
diff --git a/example/cnn_mnist.f90 b/example/cnn_mnist.f90
@@ -40,9 +40,8 @@ program cnn_mnist
       optimizer=sgd(learning_rate=3.) &
     )
 
-    if (this_image() == 1) &
-      print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( &
-        net, validation_images, label_digits(validation_labels)) * 100, ' %'
+    print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( &
+      net, validation_images, label_digits(validation_labels)) * 100, ' %'
 
   end do epochs
 
diff --git a/example/dense_mnist.f90 b/example/dense_mnist.f90
@@ -24,9 +24,8 @@ program dense_mnist
 
   call net % print_info()
 
-  if (this_image() == 1) &
-    print '(a,f5.2,a)', 'Initial accuracy: ', accuracy( &
-      net, validation_images, label_digits(validation_labels)) * 100, ' %'
+  print '(a,f5.2,a)', 'Initial accuracy: ', accuracy( &
+    net, validation_images, label_digits(validation_labels)) * 100, ' %'
 
   epochs: do n = 1, num_epochs
 
@@ -44,10 +43,9 @@ program dense_mnist
       ! 2 metrics; 1st is default loss function (quadratic), other is Pearson corr.
       output_metrics = net % evaluate(validation_images, label_digits(validation_labels), metric=corr())
       mean_metrics = sum(output_metrics, 1) / size(output_metrics, 1)
-      if (this_image() == 1) &
-        print '(a,i2,3(a,f6.3))', 'Epoch ', n, ' done, Accuracy: ', &
-          accuracy(net, validation_images, label_digits(validation_labels)) * 100, &
-          '%, Loss: ', mean_metrics(1), ', Pearson correlation: ', mean_metrics(2)
+      print '(a,i2,3(a,f6.3))', 'Epoch ', n, ' done, Accuracy: ', &
+        accuracy(net, validation_images, label_digits(validation_labels)) * 100, &
+        '%, Loss: ', mean_metrics(1), ', Pearson correlation: ', mean_metrics(2)
     end block
 
   end do epochs
diff --git a/src/nf/nf_activation.f90 b/src/nf/nf_activation.f90
@@ -557,7 +557,7 @@ pure function eval_3d_celu_prime(self, x) result(res)
     end where
   end function
 
-  pure function get_activation_by_name(activation_name) result(res)
+  function get_activation_by_name(activation_name) result(res)
   ! Workaround to get activation_function with some
   ! hardcoded default parameters by its name.
   ! Need this function since we get only activation name
diff --git a/src/nf/nf_conv2d_layer.f90 b/src/nf/nf_conv2d_layer.f90
@@ -41,7 +41,7 @@ module nf_conv2d_layer
   end type conv2d_layer
 
   interface conv2d_layer
-    pure module function conv2d_layer_cons(filters, kernel_size, activation) &
+    module function conv2d_layer_cons(filters, kernel_size, activation) &
       result(res)
       !! `conv2d_layer` constructor function
       integer, intent(in) :: filters
diff --git a/src/nf/nf_conv2d_layer_submodule.f90 b/src/nf/nf_conv2d_layer_submodule.f90
@@ -7,7 +7,7 @@
 
 contains
 
-  pure module function conv2d_layer_cons(filters, kernel_size, activation) result(res)
+  module function conv2d_layer_cons(filters, kernel_size, activation) result(res)
     implicit none
     integer, intent(in) :: filters
     integer, intent(in) :: kernel_size
diff --git a/src/nf/nf_dense_layer.f90 b/src/nf/nf_dense_layer.f90
@@ -42,7 +42,7 @@ module nf_dense_layer
   end type dense_layer
 
   interface dense_layer
-    elemental module function dense_layer_cons(output_size, activation) &
+    module function dense_layer_cons(output_size, activation) &
       result(res)
       !! This function returns the `dense_layer` instance.
       integer, intent(in) :: output_size
diff --git a/src/nf/nf_dense_layer_submodule.f90 b/src/nf/nf_dense_layer_submodule.f90
@@ -8,7 +8,7 @@
 
 contains
 
-  elemental module function dense_layer_cons(output_size, activation) &
+  module function dense_layer_cons(output_size, activation) &
     result(res)
     integer, intent(in) :: output_size
     class(activation_function), intent(in) :: activation
@@ -129,7 +129,9 @@ module subroutine init(self, input_shape)
     self % weights = self % weights / self % input_size
 
     ! Broadcast weights to all other images, if any.
+#ifdef PARALLEL
     call co_broadcast(self % weights, 1)
+#endif
 
     allocate(self % biases(self % output_size))
     self % biases = 0
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
@@ -12,7 +12,7 @@ module nf_layer_constructors
 
   interface input
 
-    pure module function input1d(layer_size) result(res)
+    module function input1d(layer_size) result(res)
       !! 1-d input layer constructor.
       !!
       !! This layer is for inputting 1-d data to the network.
@@ -35,7 +35,7 @@ pure module function input1d(layer_size) result(res)
         !! Resulting layer instance
     end function input1d
 
-    pure module function input3d(layer_shape) result(res)
+    module function input3d(layer_shape) result(res)
       !! 3-d input layer constructor.
       !!
       !! This layer is for inputting 3-d data to the network.
@@ -62,7 +62,7 @@ end function input3d
 
   interface
 
-    pure module function dense(layer_size, activation) result(res)
+    module function dense(layer_size, activation) result(res)
       !! Dense (fully-connected) layer constructor.
       !!
       !! This layer is a building block for dense, fully-connected networks,
@@ -85,7 +85,7 @@ pure module function dense(layer_size, activation) result(res)
         !! Resulting layer instance
     end function dense
 
-    pure module function flatten() result(res)
+    module function flatten() result(res)
       !! Flatten (3-d -> 1-d) layer constructor.
       !!
       !! Use this layer to chain layers with 3-d outputs to layers with 1-d
@@ -106,7 +106,7 @@ pure module function flatten() result(res)
         !! Resulting layer instance
     end function flatten
 
-    pure module function conv2d(filters, kernel_size, activation) result(res)
+    module function conv2d(filters, kernel_size, activation) result(res)
       !! 2-d convolutional layer constructor.
       !!
       !! This layer is for building 2-d convolutional network.
@@ -133,7 +133,7 @@ pure module function conv2d(filters, kernel_size, activation) result(res)
         !! Resulting layer instance
     end function conv2d
 
-    pure module function maxpool2d(pool_size, stride) result(res)
+    module function maxpool2d(pool_size, stride) result(res)
       !! 2-d maxpooling layer constructor.
       !!
       !! This layer is for downscaling other layers, typically `conv2d`.
@@ -155,7 +155,7 @@ pure module function maxpool2d(pool_size, stride) result(res)
         !! Resulting layer instance
     end function maxpool2d
 
-    pure module function reshape(output_shape) result(res)
+    module function reshape(output_shape) result(res)
       !! Rank-1 to rank-any reshape layer constructor.
       !! Currently implemented is only rank-3 for the output of the reshape.
       !!
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
@@ -14,7 +14,7 @@
 
 contains
 
-  pure module function conv2d(filters, kernel_size, activation) result(res)
+  module function conv2d(filters, kernel_size, activation) result(res)
     integer, intent(in) :: filters
     integer, intent(in) :: kernel_size
     class(activation_function), intent(in), optional :: activation
@@ -40,7 +40,7 @@ pure module function conv2d(filters, kernel_size, activation) result(res)
   end function conv2d
 
 
-  pure module function dense(layer_size, activation) result(res)
+  module function dense(layer_size, activation) result(res)
     integer, intent(in) :: layer_size
     class(activation_function), intent(in), optional :: activation
     type(layer) :: res
@@ -63,14 +63,14 @@ pure module function dense(layer_size, activation) result(res)
   end function dense
 
 
-  pure module function flatten() result(res)
+  module function flatten() result(res)
     type(layer) :: res
     res % name = 'flatten'
     allocate(res % p, source=flatten_layer())
   end function flatten
 
 
-  pure module function input1d(layer_size) result(res)
+  module function input1d(layer_size) result(res)
     integer, intent(in) :: layer_size
     type(layer) :: res
     res % name = 'input'
@@ -81,7 +81,7 @@ pure module function input1d(layer_size) result(res)
   end function input1d
 
 
-  pure module function input3d(layer_shape) result(res)
+  module function input3d(layer_shape) result(res)
     integer, intent(in) :: layer_shape(3)
     type(layer) :: res
     res % name = 'input'
@@ -91,7 +91,7 @@ pure module function input3d(layer_shape) result(res)
     res % initialized = .true.
   end function input3d
 
-  pure module function maxpool2d(pool_size, stride) result(res)
+  module function maxpool2d(pool_size, stride) result(res)
     integer, intent(in) :: pool_size
     integer, intent(in), optional :: stride
     integer :: stride_
@@ -119,7 +119,7 @@ pure module function maxpool2d(pool_size, stride) result(res)
 
   end function maxpool2d
 
-  pure module function reshape(output_shape) result(res)
+  module function reshape(output_shape) result(res)
     integer, intent(in) :: output_shape(:)
     type(layer) :: res
 
diff --git a/src/nf/nf_metrics.f90 b/src/nf/nf_metrics.f90
@@ -36,7 +36,7 @@ end function metric_interface
 
   contains
 
-  pure module function corr_eval(true, predicted) result(res)
+  pure function corr_eval(true, predicted) result(res)
     !! Pearson correlation function:
     !!
     real, intent(in) :: true(:)
diff --git a/src/nf/nf_network.f90 b/src/nf/nf_network.f90
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
diff --git a/src/nf/nf_parallel_submodule.f90 b/src/nf/nf_parallel_submodule.f90

Original file line number	Diff line number	Diff line change
`@@ -40,9 +40,8 @@ program cnn_mnist`
`40`	`40`	`optimizer=sgd(learning_rate=3.) &`
`41`	`41`	`)`
`42`	`42`
`43`		`- if (this_image() == 1) &`
`44`		`- print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( &`
`45`		`- net, validation_images, label_digits(validation_labels)) * 100, ' %'`
	`43`	`+ print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( &`
	`44`	`+ net, validation_images, label_digits(validation_labels)) * 100, ' %'`
`46`	`45`
`47`	`46`	`end do epochs`
`48`	`47`
-Original file line number
+Diff line change
   interface forward
 -    pure module subroutine forward_1d(self, input)
 +    module subroutine forward_1d(self, input)
       !! Apply a forward pass through the network.
       !!
       !! This changes the state of layers on the network.
         !! 1-d input data
     end subroutine forward_1d
 -    pure module subroutine forward_3d(self, input)
 +    module subroutine forward_3d(self, input)
       !! Apply a forward pass through the network.
       !!
       !! This changes the state of layers on the network.
   interface
 -    pure module subroutine backward(self, output, loss)
 +    module subroutine backward(self, output, loss)
       !! Apply one backward pass through the network.
       !! This changes the state of layers on the network.
       !! Typically used only internally from the `train` method,
         !! Loss instance to use. If not provided, the default is quadratic().
     end subroutine backward
 -    pure module integer function get_num_params(self)
 +    module integer function get_num_params(self)
       !! Get the number of parameters in the network.
       class(network), intent(in) :: self
       !! Network instance
-Original file line number
+Diff line change
   end function network_from_layers
 -  pure module subroutine backward(self, output, loss)
 +  module subroutine backward(self, output, loss)
     class(network), intent(in out) :: self
     real, intent(in) :: output(:)
     class(loss_type), intent(in), optional :: loss
     allocate(res(size(output, dim=1), n))
 -    do concurrent (i = 1:size(output, dim=1))
 +    do i = 1, size(output, dim=1)
       res(i,1) = self % loss % eval(output_data(i,:), output(i,:))
     end do
     if (.not. present(metric)) return
 -    do concurrent (i = 1:size(output, dim=1))
 +    do i = 1, size(output, dim=1)
       res(i,2) = metric % eval(output_data(i,:), output(i,:))
     end do
   end function evaluate_batch_1d
 -  pure module subroutine forward_1d(self, input)
 +  module subroutine forward_1d(self, input)
     class(network), intent(in out) :: self
     real, intent(in) :: input(:)
     integer :: n
   end subroutine forward_1d
 -  pure module subroutine forward_3d(self, input)
 +  module subroutine forward_3d(self, input)
     class(network), intent(in out) :: self
     real, intent(in) :: input(:,:,:)
     integer :: n
     allocate(res(output_size, batch_size))
 -    batch: do concurrent(i = 1:size(res, dim=2))
 +    batch: do i = 1, size(res, dim=2)
       call self % forward(input(:,i))
     allocate(res(output_size, batch_size))
 -    batch: do concurrent(i = 1:batch_size)
 +    batch: do i = 1, batch_size
       call self % forward(input(:,:,:,i))
   end subroutine print_info
 -  pure module function get_num_params(self)
 +  module function get_num_params(self)
     class(network), intent(in) :: self
     integer :: get_num_params
         call random_number(pos)
         batch_start = int(pos * (dataset_size - batch_size + 1)) + 1
 +#ifdef PARALLEL
         ! FIXME shuffle in a way that doesn't require co_broadcast
         call co_broadcast(batch_start, 1)
 +#endif
         ! Distribute the batch in nearly equal pieces to all images
         indices = tile_indices(batch_size)
         istart = indices(1) + batch_start - 1
         iend = indices(2) + batch_start - 1
 -        do concurrent(j = istart:iend)
 +        do j = istart, iend
           call self % forward(input_data(:,j))
           call self % backward(output_data(:,j))
         end do
       batch_size_ = 1
     end if
 +#ifdef PARALLEL
     ! Sum weight and bias gradients across images, if any
     do n = 2, size(self % layers)
       select type(this_layer => self % layers(n) % p)
           call co_sum(this_layer % db)
       end select
     end do
 +#endif
     params = self % get_params()
     call self % optimizer % minimize(params, self % get_gradients() / batch_size_)
     call self % set_params(params)
     ! Flush network gradients to zero.
 -    do concurrent(n = 2:size(self % layers))
 +    do n = 2, size(self % layers)
       select type(this_layer => self % layers(n) % p)
         type is(dense_layer)
           this_layer % dw = 0
-Original file line number
+Diff line change
@@ @@ -1,3 +1,8 @@ @@
 +#ifndef PARALLEL
 +#define num_images() 1
 +#define this_image() 1
 +#endif
++
 submodule(nf_parallel) nf_parallel_submodule
   implicit none
 contains