CliMA · Zhengyu-Huang · Nov 26, 2022 · Nov 26, 2022 · Nov 27, 2022
diff --git a/examples/FMS/Project.toml b/examples/FMS/Project.toml
@@ -0,0 +1,10 @@
+[deps]
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d"
+FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
+Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
+JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
diff --git a/examples/FMS/README.md b/examples/FMS/README.md
@@ -0,0 +1,40 @@
+
+
+This example introduces you to how to use EnsembleKalmanProcesses.jl to calibrate your model and quantify uncertainties in a massively parallel environment:
+  - Each EKP iteration requires evaluating the PDE solver N_ens times,	these evaluations can be embarrasingly paralleled.
+  - Each PDE solver evaluation can be paralleled.
+
+
+Here we have two examples
+    - linear inverse problem
+    - idealized GCM (https://github.com/szy21/fms_GCMForcing)
+
+
+To run or Add new examples
+    1) create your own `helper_funcs_XXXX.jl` file, which include 
+       - `params_prefix` String:  parameter estimations for each iteration are saved in `params_prefix*string(iteration_)*".jld"`.
+
+       - `input_prefix` String: EKP writes the input file for each PDE evalution in `input_prefix*string(i)*"/"*"input_file"`, here `i` is the ensemble particle id. This input file name can be adjusted in your function `write_solver_input_files` in `helper_funcs_XXXX.jl`.
+
+       - `output_prefix` String: PDE solver writes the output for each PDE evalution in  `output_prefix*string(i)*"/"*"output_file.jld"`, here `i` is the ensemble particle id. This input file name can be adjusted in your function `write_solver_input_files` and `read_solver_output` in `helper_funcs_XXXX.jl`.
+
+       - `input_file_template` String: template of your PDE solver input file.
+       - `prior_mean` Array{Float64, 1}: prior mean.
+       - `prior_cov` Array{Float64, 2}: prior covariance.
+       - `N_params` Int64: number of parameters. 
+       - `N_y` Int64: number of observations, it is N_obs + N_params, since the system is augmented to include prior information in EKP.
+       - `param_bounds Vector{Vector}: lower and upper bounds for each parameter, when there is no bound, use `nothing`, like `[[u1_low, nothing], [nothing, nothing]]`.
+`
+       - `read_observation` Function: customer function to return observation mean and observation noise covariance.
+       - `write_solver_input_files` Function: customer function to write input files for the PDE solver for each ensemble particle.
+       - `read_solver_output` Function: customer function to read PDE prediction for the the corresponding ensemble `ens_index`.
+       - `plot` Function : customer function to visualize the result 
+
+    2) specify the number of particles `n` and the number of iterations `n_it` in `ekp_calibration.sbatch` 
+    3) the PDE solver is called from `ekp_solver_run.sbatch`, you need to modify this function
+    4) include `helper_funcs_XXXX.jl`
+    5) submit the job `sbatch ekp_calibration.sbatch`
+    6) the results will be saved in the `output` folder, you can use `plot` function in `help_funcs.jl` to visualize the results
+
+
+
diff --git a/examples/FMS/clean.sh b/examples/FMS/clean.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+
+rm slurm*
+rm output/*/*
diff --git a/examples/FMS/ekp_calibration.sbatch b/examples/FMS/ekp_calibration.sbatch
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+#SBATCH --time=1:00:00   # walltime
+#SBATCH --ntasks=1       # number of processor cores (i.e. tasks)
+#SBATCH --nodes=1         # number of nodes
+#SBATCH --mem-per-cpu=6G   # memory per CPU core
+#SBATCH -J "ekp_call"   # job name
+#SBATCH -o "slurm_ekp_calibration"
+
+# Size of the ensemble
+n=5
+# Number of EK iterations
+n_it=20
+
+module purge
+module load julia/1.8.2 hdf5/1.10.1 netcdf-c/4.6.1 openmpi/4.0.1
+
+export JULIA_NUM_THREADS=${SLURM_CPUS_PER_TASK:=1}
+export JULIA_MPI_BINARY=system
+export JULIA_CUDA_USE_BINARYBUILDER=false
+
+# run instantiate/precompile serial
+julia --project -e 'using Pkg; Pkg.instantiate(); Pkg.build()'
+julia --project -e 'using Pkg; Pkg.precompile()'
+
+# First call to calibrate.jl will create the ensemble files from the priors and the prediction step
+id_init_ens=$(sbatch --parsable ekp_init_calibration.sbatch)
+for it in $(seq 1 1 $n_it)
+do
+# Parallel runs of forward model
+if [ "$it" = "1" ]; then
+    id_ens_array=$(sbatch --parsable --kill-on-invalid-dep=yes --dependency=afterok:$id_init_ens --array=1-$n ekp_solver_run.sbatch $it)
+else
+    id_ens_array=$(sbatch --parsable --kill-on-invalid-dep=yes --dependency=afterok:$id_ek_upd --array=1-$n ekp_solver_run.sbatch $it)
+fi
+id_ek_upd=$(sbatch --parsable --kill-on-invalid-dep=yes --dependency=afterok:$id_ens_array --export=n=$n ekp_cont_calibration.sbatch $it)
+done
+
diff --git a/examples/FMS/ekp_cont_calibration.sbatch b/examples/FMS/ekp_cont_calibration.sbatch
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+#SBATCH --time=1:00:00   # walltime
+#SBATCH --ntasks=1       # number of processor cores (i.e. tasks)
+#SBATCH --nodes=1         # number of nodes
+#SBATCH --mem-per-cpu=6G   # memory per CPU core
+#SBATCH -J "ces_cont"   # job name
+#SBATCH -o output/slurm/cont_calibration.out 
+
+module load julia/1.8.2 hdf5/1.10.1 netcdf-c/4.6.1 openmpi/4.0.1
+iteration_=${1?Error: no iteration given}
+
+julia --project sstep_calibration.jl --iteration $iteration_
+echo "Ensemble ${iteration_} recovery finished."
+
diff --git a/examples/FMS/ekp_init_calibration.sbatch b/examples/FMS/ekp_init_calibration.sbatch
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+#SBATCH --time=1:00:00   # walltime
+#SBATCH --ntasks=1       # number of processor cores (i.e. tasks)
+#SBATCH --nodes=1         # number of nodes
+#SBATCH --mem-per-cpu=6G   # memory per CPU core
+#SBATCH -J "ekp_init"   # job name
+#SBATCH -o "slurm_ekp_init"
+
+
+#julia package management
+module load julia/1.8.2 hdf5/1.10.1 netcdf-c/4.6.1 openmpi/4.0.1
+
+julia --project -e 'using Pkg; Pkg.instantiate(); Pkg.API.precompile()'
+
+julia --project preprocess.jl
+
+julia --project init_calibration.jl
+echo 'Ensemble initialized for calibration.'
+
diff --git a/examples/FMS/ekp_solver_run.sbatch b/examples/FMS/ekp_solver_run.sbatch
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+#SBATCH --time=1:00:00   # walltime
+#SBATCH --ntasks=1       # number of processor cores (i.e. tasks)
+#SBATCH --nodes=1         # number of nodes
+#SBATCH --mem-per-cpu=6G   # memory per CPU core
+#SBATCH -J "fms_ekp"   # job name
+#SBATCH -o output/slurm/array-%A_%a.out
+#SBATCH -e output/slurm/array-%A_%a.err
+
+module load julia/1.8.2 hdf5/1.10.1 netcdf-c/4.6.1 openmpi/4.0.1
+iteration_=${1?Error: no iteration given}
+
+run_num=${SLURM_ARRAY_TASK_ID}
+
+
+solver="linear"
+
+if [ $solver = "linear" ]; then
+    julia output/output_$run_num/input_file
+elif [ $solver = "fms" ]; then
+    echo "TODO"
+fi
diff --git a/examples/FMS/helper_funcs.jl b/examples/FMS/helper_funcs.jl
@@ -0,0 +1,113 @@
+using NCDatasets
+using Statistics
+using Interpolations
+using LinearAlgebra
+using JLD
+using EnsembleKalmanProcesses
+
+"""
+Transfer function 
+"""
+
+include("helper_funcs_linear.jl")
+
+
+
+α_reg = 1.0 
+update_freq = 1 
+sigma_points_type = "symmetric"
+N_ens = 2N_params  + 1
+
+
+function constraint(u::Float64, u_low, u_up)
+    if(isnothing(u_low) && isnothing(u_up))
+    	return u
+    elseif (isnothing(u_low)  &&  !isnothing(u_up))
+        return u_up - exp(u)
+    elseif (!isnothing(u_low)  &&  isnothing(u_up))	
+        return u_low + exp(u)
+    else
+        return u_low + (u_up - u_low)/(1 + exp(u))
+    end
+end
+
+function dconstraint(u::Float64, u_low, u_up)
+    if(isnothing(u_low) && isnothing(u_up))
+        return 1
+    elseif (isnothing(u_low)  &&  !isnothing(u_up))
+    	return -exp(u)
+    elseif (!isnothing(u_low)  &&  isnothing(u_up))
+        return exp(u)
+    else
+        return  -(u_up - u_low)*exp(u)/(1 + exp(u))^2
+    end
+end
+
+function constraint(u::Array{Float64, 1})
+  constraint_u = similar(u)
+  for (i, u_i) in enumerate(u)
+      constraint_u[i] = constraint(u_i, param_bounds[i][1], param_bounds[i][2])
+  end
+  return constraint_u
+end
+
+function constraint(u_mean::Array{Float64, 1}, uu_cov::Array{Float64, 2})
+  N_params = length(u_mean)
+  dc = zeros(N_params, N_params)
+  for i = 1:N_params
+      dc[i,i] = dconstraint(u_mean[i], param_bounds[i][1], param_bounds[i][2])
+  end
+
+  constraint_uu_cov = dc * uu_cov * dc'
+  return constraint_uu_cov
+end
+
+function constraint(u_ens::Array{Float64, 2})
+  constraint_u_ens = similar(u_ens)
+  N_params, N_ens = size(u_ens)
+
+  for i = 1:N_ens
+      constraint_u_ens[:, i] = constraint(u_ens[:, i])
+  end
+
+  return constraint_u_ens
+end
+
+
+# save mean and covariance and u_ens
+function save_params(ukiobj, iteration_::Int64)
+    mean, cov = ukiobj.process.u_mean[end], ukiobj.process.uu_cov[end]
+    u_ens = EnsembleKalmanProcesses.construct_sigma_ensemble(ukiobj.process, mean, cov)
+    u_p_ens = get_u_final(ukiobj)
+
+    save(params_prefix*string(iteration_)*".jld", "mean", mean, "cov", cov, "u_ens", u_ens, "u_p_ens", u_p_ens)	 
+end
+
+
+function read_params( iteration_::Int64)
+
+   data = load(params_prefix*string(iteration_)*".jld")
+
+   return data["mean"], data["cov"], data["u_ens"], data["u_p_ens"]
+
+end
+
+
+function read_all_params(N_iterations::Int64)
+    constraint_u_mean_all = zeros(N_params, N_iterations + 1)
+    constraint_uu_cov_all = zeros(N_params, N_params, N_iterations + 1)
+
+    for iteration_ = 0:N_iterations
+        data = load(params_prefix*string(iteration_)*".jld")
+	u_mean, uu_cov = data["mean"], data["cov"]
+
+	constraint_u_mean_all[:, iteration_+1] = constraint(u_mean)
+        constraint_uu_cov_all[:, :, iteration_+1] = constraint(u_mean, uu_cov)
+
+    end
+
+    return constraint_u_mean_all, constraint_uu_cov_all
+end
+
+
+