Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions examples/FMS/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d"
FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
40 changes: 40 additions & 0 deletions examples/FMS/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@


This example introduces you to how to use EnsembleKalmanProcesses.jl to calibrate your model and quantify uncertainties in a massively parallel environment:
- Each EKP iteration requires evaluating the PDE solver N_ens times, these evaluations can be embarrasingly paralleled.
- Each PDE solver evaluation can be paralleled.


Here we have two examples
- linear inverse problem
- idealized GCM (https://github.com/szy21/fms_GCMForcing)


To run or Add new examples
1) create your own `helper_funcs_XXXX.jl` file, which include
- `params_prefix` String: parameter estimations for each iteration are saved in `params_prefix*string(iteration_)*".jld"`.

- `input_prefix` String: EKP writes the input file for each PDE evalution in `input_prefix*string(i)*"/"*"input_file"`, here `i` is the ensemble particle id. This input file name can be adjusted in your function `write_solver_input_files` in `helper_funcs_XXXX.jl`.

- `output_prefix` String: PDE solver writes the output for each PDE evalution in `output_prefix*string(i)*"/"*"output_file.jld"`, here `i` is the ensemble particle id. This input file name can be adjusted in your function `write_solver_input_files` and `read_solver_output` in `helper_funcs_XXXX.jl`.

- `input_file_template` String: template of your PDE solver input file.
- `prior_mean` Array{Float64, 1}: prior mean.
- `prior_cov` Array{Float64, 2}: prior covariance.
- `N_params` Int64: number of parameters.
- `N_y` Int64: number of observations, it is N_obs + N_params, since the system is augmented to include prior information in EKP.
- `param_bounds Vector{Vector}: lower and upper bounds for each parameter, when there is no bound, use `nothing`, like `[[u1_low, nothing], [nothing, nothing]]`.
`
- `read_observation` Function: customer function to return observation mean and observation noise covariance.
- `write_solver_input_files` Function: customer function to write input files for the PDE solver for each ensemble particle.
- `read_solver_output` Function: customer function to read PDE prediction for the the corresponding ensemble `ens_index`.
- `plot` Function : customer function to visualize the result

2) specify the number of particles `n` and the number of iterations `n_it` in `ekp_calibration.sbatch`
3) the PDE solver is called from `ekp_solver_run.sbatch`, you need to modify this function
4) include `helper_funcs_XXXX.jl`
5) submit the job `sbatch ekp_calibration.sbatch`
6) the results will be saved in the `output` folder, you can use `plot` function in `help_funcs.jl` to visualize the results



5 changes: 5 additions & 0 deletions examples/FMS/clean.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash


rm slurm*
rm output/*/*
38 changes: 38 additions & 0 deletions examples/FMS/ekp_calibration.sbatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash

#SBATCH --time=1:00:00 # walltime
#SBATCH --ntasks=1 # number of processor cores (i.e. tasks)
#SBATCH --nodes=1 # number of nodes
#SBATCH --mem-per-cpu=6G # memory per CPU core
#SBATCH -J "ekp_call" # job name
#SBATCH -o "slurm_ekp_calibration"

# Size of the ensemble
n=5
# Number of EK iterations
n_it=20

module purge
module load julia/1.8.2 hdf5/1.10.1 netcdf-c/4.6.1 openmpi/4.0.1

export JULIA_NUM_THREADS=${SLURM_CPUS_PER_TASK:=1}
export JULIA_MPI_BINARY=system
export JULIA_CUDA_USE_BINARYBUILDER=false

# run instantiate/precompile serial
julia --project -e 'using Pkg; Pkg.instantiate(); Pkg.build()'
julia --project -e 'using Pkg; Pkg.precompile()'

# First call to calibrate.jl will create the ensemble files from the priors and the prediction step
id_init_ens=$(sbatch --parsable ekp_init_calibration.sbatch)
for it in $(seq 1 1 $n_it)
do
# Parallel runs of forward model
if [ "$it" = "1" ]; then
id_ens_array=$(sbatch --parsable --kill-on-invalid-dep=yes --dependency=afterok:$id_init_ens --array=1-$n ekp_solver_run.sbatch $it)
else
id_ens_array=$(sbatch --parsable --kill-on-invalid-dep=yes --dependency=afterok:$id_ek_upd --array=1-$n ekp_solver_run.sbatch $it)
fi
id_ek_upd=$(sbatch --parsable --kill-on-invalid-dep=yes --dependency=afterok:$id_ens_array --export=n=$n ekp_cont_calibration.sbatch $it)
done

15 changes: 15 additions & 0 deletions examples/FMS/ekp_cont_calibration.sbatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

#SBATCH --time=1:00:00 # walltime
#SBATCH --ntasks=1 # number of processor cores (i.e. tasks)
#SBATCH --nodes=1 # number of nodes
#SBATCH --mem-per-cpu=6G # memory per CPU core
#SBATCH -J "ces_cont" # job name
#SBATCH -o output/slurm/cont_calibration.out

module load julia/1.8.2 hdf5/1.10.1 netcdf-c/4.6.1 openmpi/4.0.1
iteration_=${1?Error: no iteration given}

julia --project sstep_calibration.jl --iteration $iteration_
echo "Ensemble ${iteration_} recovery finished."

20 changes: 20 additions & 0 deletions examples/FMS/ekp_init_calibration.sbatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

#SBATCH --time=1:00:00 # walltime
#SBATCH --ntasks=1 # number of processor cores (i.e. tasks)
#SBATCH --nodes=1 # number of nodes
#SBATCH --mem-per-cpu=6G # memory per CPU core
#SBATCH -J "ekp_init" # job name
#SBATCH -o "slurm_ekp_init"


#julia package management
module load julia/1.8.2 hdf5/1.10.1 netcdf-c/4.6.1 openmpi/4.0.1

julia --project -e 'using Pkg; Pkg.instantiate(); Pkg.API.precompile()'

julia --project preprocess.jl

julia --project init_calibration.jl
echo 'Ensemble initialized for calibration.'

23 changes: 23 additions & 0 deletions examples/FMS/ekp_solver_run.sbatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash

#SBATCH --time=1:00:00 # walltime
#SBATCH --ntasks=1 # number of processor cores (i.e. tasks)
#SBATCH --nodes=1 # number of nodes
#SBATCH --mem-per-cpu=6G # memory per CPU core
#SBATCH -J "fms_ekp" # job name
#SBATCH -o output/slurm/array-%A_%a.out
#SBATCH -e output/slurm/array-%A_%a.err

module load julia/1.8.2 hdf5/1.10.1 netcdf-c/4.6.1 openmpi/4.0.1
iteration_=${1?Error: no iteration given}

run_num=${SLURM_ARRAY_TASK_ID}


solver="linear"

if [ $solver = "linear" ]; then
julia output/output_$run_num/input_file
elif [ $solver = "fms" ]; then
echo "TODO"
fi
113 changes: 113 additions & 0 deletions examples/FMS/helper_funcs.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
using NCDatasets
using Statistics
using Interpolations
using LinearAlgebra
using JLD
using EnsembleKalmanProcesses

"""
Transfer function
"""

include("helper_funcs_linear.jl")



α_reg = 1.0
update_freq = 1
sigma_points_type = "symmetric"
N_ens = 2N_params + 1


function constraint(u::Float64, u_low, u_up)
if(isnothing(u_low) && isnothing(u_up))
return u
elseif (isnothing(u_low) && !isnothing(u_up))
return u_up - exp(u)
elseif (!isnothing(u_low) && isnothing(u_up))
return u_low + exp(u)
else
return u_low + (u_up - u_low)/(1 + exp(u))
end
end

function dconstraint(u::Float64, u_low, u_up)
if(isnothing(u_low) && isnothing(u_up))
return 1
elseif (isnothing(u_low) && !isnothing(u_up))
return -exp(u)
elseif (!isnothing(u_low) && isnothing(u_up))
return exp(u)
else
return -(u_up - u_low)*exp(u)/(1 + exp(u))^2
end
end

function constraint(u::Array{Float64, 1})
constraint_u = similar(u)
for (i, u_i) in enumerate(u)
constraint_u[i] = constraint(u_i, param_bounds[i][1], param_bounds[i][2])
end
return constraint_u
end

function constraint(u_mean::Array{Float64, 1}, uu_cov::Array{Float64, 2})
N_params = length(u_mean)
dc = zeros(N_params, N_params)
for i = 1:N_params
dc[i,i] = dconstraint(u_mean[i], param_bounds[i][1], param_bounds[i][2])
end

constraint_uu_cov = dc * uu_cov * dc'
return constraint_uu_cov
end

function constraint(u_ens::Array{Float64, 2})
constraint_u_ens = similar(u_ens)
N_params, N_ens = size(u_ens)

for i = 1:N_ens
constraint_u_ens[:, i] = constraint(u_ens[:, i])
end

return constraint_u_ens
end


# save mean and covariance and u_ens
function save_params(ukiobj, iteration_::Int64)
mean, cov = ukiobj.process.u_mean[end], ukiobj.process.uu_cov[end]
u_ens = EnsembleKalmanProcesses.construct_sigma_ensemble(ukiobj.process, mean, cov)
u_p_ens = get_u_final(ukiobj)

save(params_prefix*string(iteration_)*".jld", "mean", mean, "cov", cov, "u_ens", u_ens, "u_p_ens", u_p_ens)
end


function read_params( iteration_::Int64)

data = load(params_prefix*string(iteration_)*".jld")

return data["mean"], data["cov"], data["u_ens"], data["u_p_ens"]

end


function read_all_params(N_iterations::Int64)
constraint_u_mean_all = zeros(N_params, N_iterations + 1)
constraint_uu_cov_all = zeros(N_params, N_params, N_iterations + 1)

for iteration_ = 0:N_iterations
data = load(params_prefix*string(iteration_)*".jld")
u_mean, uu_cov = data["mean"], data["cov"]

constraint_u_mean_all[:, iteration_+1] = constraint(u_mean)
constraint_uu_cov_all[:, :, iteration_+1] = constraint(u_mean, uu_cov)

end

return constraint_u_mean_all, constraint_uu_cov_all
end



Loading