Add modular precision update (#632)

* Replace kind(0d0) with wp declared in common * Replace instances of 0d0 and d0 with wp * Replace MPI_DOUBLE_PRECISION with constant declared in common, fix bugs with syscheck module * Incorporate patch file changes only, improve m_precision_select * implement fixes and expand to cover more cases of double precision * Update m_precision_select.f90 Check if old method of checking MPI support passes mpi tests * attempt fix for double precision with mpi support * Update syscheck.fpp * check single precision to see if working with MPI * Test to see if benchmark build working on gpu/attempt to fix error with m_fttw * test single precision on gpus, add flags to choose between single and double * single precision gpu test fix * add flags and distinguish between double and single precision at build time, to be tested more extensively * fix formatting * test single precision on test suite again due to NaN issue not ready for ci yet * Most recent * fix bug in eigen solver module * update eigen_solvers * handle cases with NaNs after finding source of error * fix NaN issue without skipping and retry each test 3 times in single precision * adds sp benchmarking CI to ensure speedup in single precision * add deletion back to bench.yml * fix issues with first merge * fix issues with second merge * fix Benchmarking Speedup CI * just test bench.yml changes * fix changes to CI * fix small issue * more fixes * another small CI fix * CI fix * CI fix * revert CI changes * Fix revert * fix * fix * fix * fix * final commit, remove last remaining warnings * nevermind, small fix * Update lint-source.yml * Update lint-source.yml * Update lint-source.yml * fix missing precision, satiate linter * satiate * hack mixlayer_perturb issue, just so i can test CI * add ci to gpu * small CI fix * CI fix * hopefully last commit * chemistry * fix minor issues * fix issue with IBM * format * Update m_model.fpp * satiate linter * add documentation * fix some missing ones * add grep check * fix blunder * cleanup * fix linter a bit * Update README.md * Discard changes to examples/3D_performance_test/case.py * Discard changes to .github/workflows/phoenix/bench.sh * Discard changes to .github/workflows/phoenix/submit.sh * Discard changes to .github/workflows/phoenix/test.sh * Discard changes to .github/workflows/bench.yml --------- Co-authored-by: Archith Iyer <[email protected]> Co-authored-by: Krishnan Iyer <[email protected]> Co-authored-by: Spencer Bryngelson <[email protected]> Co-authored-by: Archith Iyer <[email protected]> Co-authored-by: Archith Iyer <[email protected]> Co-authored-by: Spencer Bryngelson <[email protected]>
MFlowCode · Dec 16, 2024 · 635d86f · 635d86f
1 parent d3a852d
commit 635d86f
Showing 76 changed files with 4,730 additions and 4,562 deletions.
diff --git a/.github/workflows/lint-source.yml b/.github/workflows/lint-source.yml
@@ -29,4 +29,11 @@ jobs:
       run: pip install fortitude-lint ansi2txt
 
     - name: Lint the source code
-      run:  fortitude check --ignore=E001,S001,S101,M011,F001,S041,T001 ./src/*/* || true
+      run:  fortitude check --file-extensions=f90,fpp,fypp --ignore=E001,S001,S101,M011,F001,S041,T001,S101 ./src/** || true
+
+    - name: Ensure kind is specified
+      run:  fortitude check --file-extensions=f90,fpp,fypp --select=P001 ./src/**
+
+    - name: No double precision intrinsics
+      run: |
+        ! grep -iR 'dexp\|dlog\|dble\|dabs\|double\ precision\|real(8)\|real(4)\|dprod\|dmin\|dmax\|dfloat\|dreal\|dcos\|dsin\|dtan\|dsign\|dtanh\|dsinh\|dcosh\|\.d0\|\dd0' --exclude-dir=syscheck ./src/*
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -26,6 +26,7 @@ jobs:
       matrix:
         os:    ['ubuntu', 'macos']
         mpi:   ['mpi']
+        precision: ['']
         debug: ['debug', 'no-debug']
         intel: [true, false]
         exclude:
@@ -35,6 +36,7 @@ jobs:
         include:
           - os:    ubuntu
             mpi:   no-mpi
+            precision: single
             debug: no-debug
             intel: false
 
@@ -86,7 +88,7 @@ jobs:
       - name: Build
         run:  |
           if [ '${{ matrix.intel }}' == 'true' ]; then . /opt/intel/oneapi/setvars.sh; fi
-          /bin/bash mfc.sh build -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }} 
+          /bin/bash mfc.sh build -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }} --${{ matrix.precision }} 
 
       - name: Test
         run:  |
@@ -140,4 +142,4 @@ jobs:
         if:   always()
         with:
           name: logs-${{ strategy.job-index }}-${{ matrix.device }}
-          path: test-${{ matrix.device }}.out
+          path: test-${{ matrix.device }}.out
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -26,6 +26,7 @@ option(MFC_POST_PROCESS  "Build post_process"                                OFF
 option(MFC_SYSCHECK      "Build syscheck"                                    OFF)
 option(MFC_DOCUMENTATION "Build documentation"                               OFF)
 option(MFC_ALL           "Build everything"                                  OFF)
+option(MFC_SINGLE_PRECISION "Build single precision"                         OFF)
 
 if (MFC_ALL)
     set(MFC_PRE_PROCESS   ON FORCE)
@@ -34,6 +35,12 @@ if (MFC_ALL)
     set(MFC_DOCUMENTATION ON FORCE)
 endif()
 
+if (MFC_SINGLE_PRECISION)
+    add_compile_definitions(MFC_SINGLE_PRECISION)
+else()
+    add_compile_definitions(MFC_DOUBLE_PRECISION)
+endif()
+
 
 # CMake Library Imports
 

diff --git a/README.md b/README.md
@@ -161,6 +161,7 @@ They are organized below. Just click the drop-downs!
 	* \>66K AMD GPUs on the first exascale computer, [OLCF Frontier](https://www.olcf.ornl.gov/frontier/) (AMD MI250X-based)
 * Near compute roofline behavior
 * RDMA (remote data memory access; GPU-GPU direct communication) via GPU-aware MPI on NVIDIA (CUDA-aware MPI) and AMD GPU systems
+* Optional single-precision computation and storage
 </details>
 
 <details>

diff --git a/docs/documentation/getting-started.md b/docs/documentation/getting-started.md
@@ -126,6 +126,7 @@ MFC can be built with support for various (compile-time) features:
 | **Debug**          | `--debug`   | `--no-debug`   | Off     | Requests the compiler build MFC in debug mode.                  |
 | **GCov**           | `--gcov`    | `--no-gcov`    | Off     | Builds MFC with coverage flags on.                              |
 | **Unified Memory** | `--unified` | `--no-unified` | Off     | Builds MFC with unified CPU/GPU memory (GH-200 superchip only)  |
+| **Single**         | `--single`  | `--no-single`  | Off     | Builds MFC in single precision     
 
 _⚠️ The `--gpu` option requires that your compiler supports OpenACC for Fortran for your target GPU architecture._
 

diff --git a/misc/m_silo_proxy.f90 b/misc/m_silo_proxy.f90
@@ -189,9 +189,9 @@ function DBPUTQM(dbid, name, lname, xname, lxname, yname, lyname, & !! -
         integer, intent(IN) :: lyname
         character(LEN=*), intent(IN) :: zname
         integer, intent(IN) :: lzname
-        real(kind(0d0)), dimension(:), intent(IN) :: x
-        real(kind(0d0)), dimension(:), intent(IN) :: y
-        real(kind(0d0)), dimension(:), intent(IN) :: z
+        real(wp), dimension(:), intent(IN) :: x
+        real(wp), dimension(:), intent(IN) :: y
+        real(wp), dimension(:), intent(IN) :: z
         integer, dimension(:), intent(IN) :: dims
         integer, intent(IN) :: ndims
         integer, intent(IN) :: datatype
@@ -215,8 +215,8 @@ function DBPUTCURVE(dbid, curvename, lcurvename, xvals, yvals, & !! ----
         integer, intent(IN) :: dbid
         character(LEN=*), intent(IN) :: curvename
         integer, intent(IN) :: lcurvename
-        real(kind(0d0)), dimension(:), intent(IN) :: xvals
-        real(kind(0d0)), dimension(:), intent(IN) :: yvals
+        real(wp), dimension(:), intent(IN) :: xvals
+        real(wp), dimension(:), intent(IN) :: yvals
         integer, intent(IN) :: datatype
         integer, intent(IN) :: npoints
         integer, intent(IN) :: optlist_id
@@ -264,7 +264,7 @@ function DBPUTQV1(dbid, name, lname, meshname, lmeshname, var, & !! ----
         integer, intent(IN) :: lname
         character(LEN=*), intent(IN) :: meshname
         integer, intent(IN) :: lmeshname
-        real(kind(0d0)), dimension(:, :, :), intent(IN) :: var
+        real(wp), dimension(:, :, :), intent(IN) :: var
         integer, dimension(:), intent(IN) :: dims
         integer, intent(IN) :: ndims
         integer, intent(IN) :: mixvar

diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp
@@ -92,8 +92,8 @@
     end if
 #:enddef
 
-#define t_vec3   real(kind(0d0)), dimension(1:3)
-#define t_mat4x4 real(kind(0d0)), dimension(1:4,1:4)
+#define t_vec3   real(wp), dimension(1:3)
+#define t_mat4x4 real(wp), dimension(1:4,1:4)
 
 #:def ASSERT(predicate, message = None)
     if (.not. (${predicate}$)) then

diff --git a/src/common/m_checker_common.fpp b/src/common/m_checker_common.fpp
@@ -18,7 +18,7 @@ module m_checker_common
 
     implicit none
 
-    private; public :: s_check_inputs_common
+    private; public :: s_check_inputs_common, wp
 
 contains
 
@@ -61,7 +61,7 @@ contains
         !! Called by s_check_inputs_common for simulation and post-processing
     subroutine s_check_inputs_time_stepping
         if (cfl_dt) then
-            @:PROHIBIT(cfl_target < 0 .or. cfl_target > 1d0)
+            @:PROHIBIT(cfl_target < 0 .or. cfl_target > 1._wp)
             @:PROHIBIT(t_stop <= 0)
             @:PROHIBIT(t_save <= 0)
             @:PROHIBIT(t_save > t_stop)
@@ -144,10 +144,10 @@ contains
         @:PROHIBIT(relax .and. model_eqns /= 3, "phase change requires model_eqns = 3")
         @:PROHIBIT(relax .and. relax_model < 0, "relax_model must be in between 0 and 6")
         @:PROHIBIT(relax .and. relax_model > 6, "relax_model must be in between 0 and 6")
-        @:PROHIBIT(relax .and. palpha_eps <= 0d0, "palpha_eps must be positive")
-        @:PROHIBIT(relax .and. palpha_eps >= 1d0, "palpha_eps must be less than 1")
-        @:PROHIBIT(relax .and. ptgalpha_eps <= 0d0, "ptgalpha_eps must be positive")
-        @:PROHIBIT(relax .and. ptgalpha_eps >= 1d0, "ptgalpha_eps must be less than 1")
+        @:PROHIBIT(relax .and. palpha_eps <= 0._wp, "palpha_eps must be positive")
+        @:PROHIBIT(relax .and. palpha_eps >= 1._wp, "palpha_eps must be less than 1")
+        @:PROHIBIT(relax .and. ptgalpha_eps <= 0._wp, "ptgalpha_eps must be positive")
+        @:PROHIBIT(relax .and. ptgalpha_eps >= 1._wp, "ptgalpha_eps must be less than 1")
         @:PROHIBIT((.not. relax) .and. &
             ((relax_model /= dflt_int) .or. (.not. f_is_default(palpha_eps)) .or. (.not. f_is_default(ptgalpha_eps))), &
             "relax is not set as true, but other phase change parameters have been modified. " // &
@@ -262,27 +262,27 @@ contains
 
         do i = 1, num_fluids
             call s_int_to_str(i, iStr)
-            @:PROHIBIT(.not. f_is_default(fluid_pp(i)%gamma) .and. fluid_pp(i)%gamma <= 0d0, &
+            @:PROHIBIT(.not. f_is_default(fluid_pp(i)%gamma) .and. fluid_pp(i)%gamma <= 0._wp, &
                 "fluid_pp("//trim(iStr)//")%gamma must be positive")
 
             @:PROHIBIT(model_eqns == 1 .and. (.not. f_is_default(fluid_pp(i)%gamma)), &
                 "model_eqns = 1 does not support fluid_pp("//trim(iStr)//")%gamma")
 
-            @:PROHIBIT((i <= num_fluids + bub_fac .and. fluid_pp(i)%gamma <= 0d0) .or. &
+            @:PROHIBIT((i <= num_fluids + bub_fac .and. fluid_pp(i)%gamma <= 0._wp) .or. &
                 (i > num_fluids + bub_fac .and. (.not. f_is_default(fluid_pp(i)%gamma))), &
                 "for fluid_pp("//trim(iStr)//")%gamma")
 
-            @:PROHIBIT(.not. f_is_default(fluid_pp(i)%pi_inf) .and. fluid_pp(i)%pi_inf < 0d0, &
+            @:PROHIBIT(.not. f_is_default(fluid_pp(i)%pi_inf) .and. fluid_pp(i)%pi_inf < 0._wp, &
                 "fluid_pp("//trim(iStr)//")%pi_inf must be non-negative")
 
             @:PROHIBIT(model_eqns == 1 .and. (.not. f_is_default(fluid_pp(i)%pi_inf)), &
                 "model_eqns = 1 does not support fluid_pp("//trim(iStr)//")%pi_inf")
 
-            @:PROHIBIT((i <= num_fluids + bub_fac .and. fluid_pp(i)%pi_inf < 0d0) .or. &
+            @:PROHIBIT((i <= num_fluids + bub_fac .and. fluid_pp(i)%pi_inf < 0._wp) .or. &
                 (i > num_fluids + bub_fac .and. (.not. f_is_default(fluid_pp(i)%pi_inf))), &
                 "for fluid_pp("//trim(iStr)//")%pi_inf")
 
-            @:PROHIBIT(fluid_pp(i)%cv < 0d0, &
+            @:PROHIBIT(fluid_pp(i)%cv < 0._wp, &
                 "fluid_pp("//trim(iStr)//")%cv must be positive")
         end do
     end subroutine s_check_inputs_stiffened_eos
@@ -293,7 +293,7 @@ contains
 
         integer :: i
 
-        @:PROHIBIT(surface_tension .and. sigma < 0d0, &
+        @:PROHIBIT(surface_tension .and. sigma < 0._wp, &
             "sigma must be greater than or equal to zero")
 
         @:PROHIBIT(surface_tension .and. sigma == dflt_real, &
@@ -321,9 +321,9 @@ contains
         !! Called by s_check_inputs_common for all three stages
     subroutine s_check_inputs_moving_bc
         #:for X, VB2, VB3 in [('x', 'vb2', 'vb3'), ('y', 'vb3', 'vb1'), ('z', 'vb1', 'vb2')]
-            if (any((/bc_${X}$%vb1, bc_${X}$%vb2, bc_${X}$%vb3/) /= 0d0)) then
+            if (any((/bc_${X}$%vb1, bc_${X}$%vb2, bc_${X}$%vb3/) /= 0._wp)) then
                 if (bc_${X}$%beg == -15) then
-                    if (any((/bc_${X}$%${VB2}$, bc_${X}$%${VB3}$/) /= 0d0)) then
+                    if (any((/bc_${X}$%${VB2}$, bc_${X}$%${VB3}$/) /= 0._wp)) then
                         call s_mpi_abort("bc_${X}$%beg must be -15 if "// &
                                          "bc_${X}$%${VB2}$ or bc_${X}$%${VB3}$ "// &
                                          "is set. Exiting ...")
@@ -336,9 +336,9 @@ contains
         #:endfor
 
         #:for X, VE2, VE3 in [('x', 've2', 've3'), ('y', 've3', 've1'), ('z', 've1', 've2')]
-            if (any((/bc_${X}$%ve1, bc_${X}$%ve2, bc_${X}$%ve3/) /= 0d0)) then
+            if (any((/bc_${X}$%ve1, bc_${X}$%ve2, bc_${X}$%ve3/) /= 0._wp)) then
                 if (bc_${X}$%end == -15) then
-                    if (any((/bc_${X}$%${VE2}$, bc_${X}$%${VE3}$/) /= 0d0)) then
+                    if (any((/bc_${X}$%${VE2}$, bc_${X}$%${VE3}$/) /= 0._wp)) then
                         call s_mpi_abort("bc_${X}$%end must be -15 if "// &
                                          "bc_${X}$%${VE2}$ or bc_${X}$%${VE3}$ "// &
                                          "is set. Exiting ...")

diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp
@@ -4,13 +4,15 @@
 
 module m_constants
 
+    use m_precision_select
+
     character, parameter :: dflt_char = ' ' !< Default string value
 
-    real(kind(0d0)), parameter :: dflt_real = -1d6                !< Default real value
-    real(kind(0d0)), parameter :: sgm_eps = 1d-16               !< Segmentation tolerance
-    real(kind(0d0)), parameter :: small_alf = 1d-11                !< Small alf tolerance
-    real(kind(0d0)), parameter :: pi = 3.141592653589793d0 !< Pi
-    real(kind(0d0)), parameter :: verysmall = 1.d-12              !< Very small number
+    real(wp), parameter :: dflt_real = -1e6_wp                !< Default real value
+    real(wp), parameter :: sgm_eps = 1e-16_wp               !< Segmentation tolerance
+    real(wp), parameter :: small_alf = 1e-11_wp                !< Small alf tolerance
+    real(wp), parameter :: pi = 3.141592653589793_wp !< Pi
+    real(wp), parameter :: verysmall = 1.e-12_wp              !< Very small number
 
     integer, parameter :: num_stcls_min = 5    !< Minimum # of stencils
     integer, parameter :: path_len = 400  !< Maximum path length
@@ -22,21 +24,21 @@ module m_constants
     integer, parameter :: num_patches_max = 10
     integer, parameter :: pathlen_max = 400
     integer, parameter :: nnode = 4    !< Number of QBMM nodes
-    real(kind(0d0)), parameter :: capillary_cutoff = 1e-6 !< color function gradient magnitude at which to apply the surface tension fluxes
-    real(kind(0d0)), parameter :: acoustic_spatial_support_width = 2.5d0 !< Spatial support width of acoustic source, used in s_source_spatial
-    real(kind(0d0)), parameter :: dflt_vcfl_dt = 100d0 !< value of vcfl_dt when viscosity is off for computing adaptive timestep size
-    real(kind(0d0)), parameter :: broadband_spectral_level_constant = 20d0 !< The constant to scale the spectral level at the lower frequency bound
-    real(kind(0d0)), parameter :: broadband_spectral_level_growth_rate = 10d0 !< The spectral level constant to correct the magnitude at each frqeuency to ensure the source is overall broadband
+    real(wp), parameter :: capillary_cutoff = 1e-6 !< color function gradient magnitude at which to apply the surface tension fluxes
+    real(wp), parameter :: acoustic_spatial_support_width = 2.5_wp !< Spatial support width of acoustic source, used in s_source_spatial
+    real(wp), parameter :: dflt_vcfl_dt = 100._wp !< value of vcfl_dt when viscosity is off for computing adaptive timestep size
+    real(wp), parameter :: broadband_spectral_level_constant = 20._wp !< The constant to scale the spectral level at the lower frequency bound
+    real(wp), parameter :: broadband_spectral_level_growth_rate = 10._wp !< The spectral level constant to correct the magnitude at each frqeuency to ensure the source is overall broadband
 
     ! IBM+STL interpolation constants
     integer, parameter :: Ifactor_2D = 50 !< Multiple factor of the ratio (edge to cell width) for interpolation along edges for 2D models
     integer, parameter :: Ifactor_3D = 5 !< Multiple factor of the ratio (edge to cell width) for interpolation along edges for 3D models
     integer, parameter :: Ifactor_bary_3D = 20 !< Multiple factor of the ratio (triangle area to cell face area) for interpolation on triangle facets for 3D models
     integer, parameter :: num_ray = 20 !< Default number of rays traced per cell
-    real(kind(0d0)), parameter :: ray_tracing_threshold = 0.9d0 !< Threshold above which the cell is marked as the model patch
-    real(kind(0d0)), parameter :: threshold_vector_zero = 1d-10 !< Threshold to treat the component of a vector to be zero
-    real(kind(0d0)), parameter :: threshold_edge_zero = 1d-10 !< Threshold to treat two edges to be overlapped
-    real(kind(0d0)), parameter :: threshold_bary = 1d-1 !< Threshold to interpolate a barycentric facet
-    real(kind(0d0)), parameter :: initial_distance_buffer = 1d12 !< Initialized levelset distance for the shortest path pair algorithm
+    real(wp), parameter :: ray_tracing_threshold = 0.9_wp !< Threshold above which the cell is marked as the model patch
+    real(wp), parameter :: threshold_vector_zero = 1e-10 !< Threshold to treat the component of a vector to be zero
+    real(wp), parameter :: threshold_edge_zero = 1e-10 !< Threshold to treat two edges to be overlapped
+    real(wp), parameter :: threshold_bary = 1e-1 !< Threshold to interpolate a barycentric facet
+    real(wp), parameter :: initial_distance_buffer = 1e12_wp !< Initialized levelset distance for the shortest path pair algorithm
 
 end module m_constants
diff --git a/src/common/m_delay_file_access.f90 b/src/common/m_delay_file_access.f90
@@ -1,4 +1,5 @@
 module m_delay_file_access
+    use m_precision_select
     implicit none
     private
 
@@ -14,7 +15,7 @@ subroutine DelayFileAccess(ProcessRank)
         integer, intent(in) :: ProcessRank
 
         integer :: iDelay, nFileAccessDelayIterations
-        real(kind(0d0)) :: Number, Dummy
+        real(wp) :: Number, Dummy
 
         nFileAccessDelayIterations &
             = (ProcessRank/N_PROCESSES_FILE_ACCESS)*FILE_ACCESS_DELAY_UNIT