fix mpi solution

JBlaschke · JBlaschke · commit d43b32839087 · 2024-11-17T06:56:37.000-08:00
diff --git a/.gitignore b/.gitignore
@@ -29,3 +29,6 @@ Manifest.toml
 activate.sh
 deactivate.sh
 ext/GrayScott.jl
+
+
+*.jld2
diff --git a/Manifest.toml b/Manifest.toml
@@ -2,7 +2,7 @@
 
 julia_version = "1.10.4"
 manifest_format = "2.0"
-project_hash = "fc814033df7bb2362d0b2cbe216991051dd7475f"
+project_hash = "f95ad848b2db86117d9a28403e2805e9e12ad741"
 
 [[deps.AbstractFFTs]]
 deps = ["LinearAlgebra"]
@@ -839,6 +839,12 @@ git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
 uuid = "82899510-4779-5014-852e-03e436cf321d"
 version = "1.0.0"
 
+[[deps.JLD2]]
+deps = ["FileIO", "MacroTools", "Mmap", "OrderedCollections", "PrecompileTools", "Requires", "TranscodingStreams"]
+git-tree-sha1 = "ce5737c0d4490b0e0040b5dc77fbb6a351ddf188"
+uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+version = "0.5.8"
+
 [[deps.JLFzf]]
 deps = ["Pipe", "REPL", "Random", "fzf_jll"]
 git-tree-sha1 = "39d64b09147620f5ffbf6b2d3255be3c901bec63"
diff --git a/Project.toml b/Project.toml
@@ -6,6 +6,7 @@ ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
 ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
 Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
 IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a"
+JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
diff --git a/parts/mpi/diffusion_2d_mpi.jl b/parts/mpi/diffusion_2d_mpi.jl
@@ -2,7 +2,7 @@
 using Printf
 using JLD2
 using MPI
-include(joinpath(@__DIR__, "../shared.jl"))
+include(joinpath(@__DIR__, "shared.jl"))
 
 # convenience macros simply to avoid writing nested finite-difference expression
 macro qx(ix, iy) esc(:(-D * (C[$ix+1, $iy] - C[$ix, $iy]) / dx)) end
diff --git a/parts/mpi/shared.jl b/parts/mpi/shared.jl
@@ -0,0 +1,118 @@
+## PARAMETER INITIALIZATION
+function init_params(; ns=64, nt=100, kwargs...)
+    L    = 10.0               # physical domain length
+    D    = 1.0                # diffusion coefficient
+    ds   = L / ns             # grid spacing
+    dt   = ds^2 / D / 8.2     # time step
+    cs   = range(start=ds / 2, stop=L - ds / 2, length=ns) .- 0.5 * L # vector of coord points
+    nout = floor(Int, nt / 5) # plotting frequency
+    return (; L, D, ns, nt, ds, dt, cs, nout, kwargs...)
+end
+
+function init_params_mpi(; dims, coords, ns=64, nt=100, kwargs...)
+    L    = 10.0                      # physical domain length
+    D    = 1.0                       # diffusion coefficient
+    nx_g = dims[1] * (ns - 2) + 2    # global number of grid points along dim 1
+    ny_g = dims[2] * (ns - 2) + 2    # global number of grid points along dim 2
+    dx   = L / nx_g                  # grid spacing
+    dy   = L / ny_g                  # grid spacing
+    dt   = min(dx, dy)^2 / D / 8.2   # time step
+    x0   = coords[1] * (ns - 2) * dx # coords shift to get global coords on  local process
+    y0   = coords[2] * (ns - 2) * dy # coords shift to get global coords on  local process
+    xcs  = LinRange(x0 + dx / 2, x0 + ns * dx - dx / 2, ns) .- 0.5 .* L # local vector of global coord points
+    ycs  = LinRange(y0 + dy / 2, y0 + ns * dx - dy / 2, ns) .- 0.5 .* L # local vector of global coord points
+    return (; L, D, ns, nt, dx, dy, dt, xcs, ycs, kwargs...)
+end
+
+function init_params_gpu(; ns=64, nt=100, kwargs...)
+    L    = 10.0                   # physical domain length
+    D    = 1.0                    # diffusion coefficient
+    ds   = L / ns                 # grid spacing
+    dt   = ds^2 / D / 8.2         # time step
+    cs   = range(start=ds / 2, stop=L - ds / 2, length=ns) .- 0.5 * L # vector of coord points
+    nout = floor(Int, nt / 5)     # plotting frequency
+    nthreads = 32, 8              # number of threads per block
+    nblocks  = cld.(ns, nthreads) # number of blocks
+    return (; L, D, ns, nt, ds, dt, cs, nout, nthreads, nblocks, kwargs...)
+end
+
+function init_params_gpu_mpi(; dims, coords, ns=64, nt=100, kwargs...)
+    L    = 10.0                      # physical domain length
+    D    = 1.0                       # diffusion coefficient
+    nx_g = dims[1] * (ns - 2) + 2    # global number of grid points along dim 1
+    ny_g = dims[2] * (ns - 2) + 2    # global number of grid points along dim 2
+    dx   = L / nx_g                  # grid spacing
+    dy   = L / ny_g                  # grid spacing
+    dt   = min(dx, dy)^2 / D / 8.2   # time step
+    x0   = coords[1] * (ns - 2) * dx # coords shift to get global coords on  local process
+    y0   = coords[2] * (ns - 2) * dy # coords shift to get global coords on  local process
+    xcs  = LinRange(x0 + dx / 2, x0 + ns * dx - dx / 2, ns) .- 0.5 * L # local vector of global coord points
+    ycs  = LinRange(y0 + dy / 2, y0 + ns * dy - dy / 2, ns) .- 0.5 * L # local vector of global coord points
+    nthreads = 32, 8                 # number of threads per block
+    nblocks  = cld.(ns, nthreads)    # number of blocks
+    return (; L, D, ns, nt, dx, dy, dt, xcs, ycs, nthreads, nblocks, kwargs...)
+end
+
+## ARRAY INITIALIZATION
+function init_arrays_with_flux(params)
+    (; cs, ns) = params
+    C  = @. exp(-cs^2 - (cs')^2)
+    qx = zeros(ns - 1, ns - 2)
+    qy = zeros(ns - 2, ns - 1)
+    return C, qx, qy
+end
+
+function init_arrays(params)
+    (; cs) = params
+    C  = @. exp(-cs^2 - (cs')^2)
+    C2 = copy(C)
+    return C, C2
+end
+
+function init_arrays_mpi(params)
+    (; xcs, ycs) = params
+    C  = @. exp(-xcs^2 - (ycs')^2)
+    C2 = copy(C)
+    return C, C2
+end
+
+function init_arrays_gpu(params)
+    (; cs) = params
+    C  = CuArray(@. exp(-cs^2 - (cs')^2))
+    C2 = copy(C)
+    return C, C2
+end
+
+function init_arrays_gpu_mpi(params)
+    (; xcs, ycs) = params
+    C  = CuArray(@. exp(-xcs^2 - (ycs')^2))
+    C2 = copy(C)
+    return C, C2
+end
+
+## VISUALIZATION & PRINTING
+function maybe_init_visualization(params, C)
+    if params.do_visualize
+        fig = Figure(; size=(500, 400), fontsize=14)
+        ax  = Axis(fig[1, 1][1, 1]; aspect=DataAspect(), title="C")
+        plt = heatmap!(ax, params.cs, params.cs, Array(C); colormap=:turbo, colorrange=(0, 1))
+        cb  = Colorbar(fig[1, 1][1, 2], plt)
+        display(fig)
+        return fig, plt
+    end
+    return nothing, nothing
+end
+
+function maybe_update_visualization(params, fig, plt, C, it)
+    if params.do_visualize && (it % params.nout == 0)
+        plt[3] = Array(C)
+        display(fig)
+    end
+    return nothing
+end
+
+function print_perf(params, t_toc)
+    (; ns, nt) = params
+    @printf("Time = %1.4e s, T_eff = %1.2f GB/s \n", t_toc, round((2 / 1e9 * ns^2 * sizeof(Float64)) / (t_toc / (nt - 10)), sigdigits=6))
+    return nothing
+end
diff --git a/parts/mpi/solution/diffusion_2d_mpi.jl b/parts/mpi/solution/diffusion_2d_mpi.jl
@@ -2,7 +2,7 @@
 using Printf
 using JLD2
 using MPI
-include(joinpath(@__DIR__, "../../shared.jl"))
+include(joinpath(@__DIR__, "../shared.jl"))
 
 # convenience macros simply to avoid writing nested finite-difference expression
 macro qx(ix, iy) esc(:(-D * (C[$ix+1, $iy] - C[$ix, $iy]) / dx)) end
diff --git a/parts/mpi/solution/slurm_mpi_multinode.out b/parts/mpi/solution/slurm_mpi_multinode.out
@@ -0,0 +1,2 @@
+nprocs = 16, dims = (4, 4)
+Time = 1.3349e-02 s, T_eff = 7.07 GB/s 
diff --git a/parts/mpi/solution/slurm_mpi_singlenode.out b/parts/mpi/solution/slurm_mpi_singlenode.out
@@ -1,2 +1,10 @@
-nprocs = 4, dims = [2, 2]
-Time = 1.2309e-02 s, T_eff = 7.67 GB/s 
+nprocs = 4, dims = (2, 2)
+Time = 1.2562e-02 s, T_eff = 7.51 GB/s 
+┌ Warning: Opening file with JLD2.MmapIO failed, falling back to IOStream
+└ @ JLD2 /pscratch/sd/b/blaschke/depot/packages/JLD2/KyKLQ/src/JLD2.jl:153
+┌ Warning: Opening file with JLD2.MmapIO failed, falling back to IOStream
+└ @ JLD2 /pscratch/sd/b/blaschke/depot/packages/JLD2/KyKLQ/src/JLD2.jl:153
+┌ Warning: Opening file with JLD2.MmapIO failed, falling back to IOStream
+└ @ JLD2 /pscratch/sd/b/blaschke/depot/packages/JLD2/KyKLQ/src/JLD2.jl:153
+┌ Warning: Opening file with JLD2.MmapIO failed, falling back to IOStream
+└ @ JLD2 /pscratch/sd/b/blaschke/depot/packages/JLD2/KyKLQ/src/JLD2.jl:153
diff --git a/parts/mpi/visualize_mpi.ipynb b/parts/mpi/visualize_mpi.ipynb

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+nprocs = 16, dims = (4, 4)`
	`2`	`+Time = 1.3349e-02 s, T_eff = 7.07 GB/s`