Skip to content

Commit

Permalink
trajectory reconstruction
Browse files Browse the repository at this point in the history
  • Loading branch information
nifets committed May 19, 2023
1 parent 975632b commit e1a355c
Show file tree
Hide file tree
Showing 16 changed files with 30,395 additions and 84 deletions.
5,268 changes: 5,268 additions & 0 deletions .ipynb_checkpoints/Globins Experiment-checkpoint.ipynb

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions .ipynb_checkpoints/Pair Parameter Inference-checkpoint.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1492,17 +1492,19 @@
"using TimerOutputs\n",
"\n",
"@model function pair_param_inference_simple(pairs)\n",
" N = length(pairs)\n",
" # ____________________________________________________________________________________________________\n",
" # Step 1 - Sample prior parameters\n",
" \n",
" # Time parameter\n",
" t ~ Exponential(1.0) \n",
" # Time paramter for each pair\n",
" t ~ filldist(Exponential(1.0), N)\n",
" \n",
" # Alignment parameters\n",
" @submodel prefix=\"τ\" Λ = tkf92_prior()\n",
" # Dihedral parameters \n",
" @submodel prefix=\"Θ\" Ξ = jwndiff_prior()\n",
" # Check parameter validity \n",
" if t ≤ 0 || any(isnan.(Ξ)) || any(isnan.(Λ))\n",
" if any(t .≤ 0) || any(isnan.(Ξ)) || any(isnan.(Λ))\n",
" Turing.@addlogprob! -Inf; return\n",
" end\n",
" \n",
Expand Down
1,159 changes: 1,110 additions & 49 deletions .ipynb_checkpoints/Simulated Data Experiments-checkpoint.ipynb

Large diffs are not rendered by default.

5,232 changes: 5,232 additions & 0 deletions Globins Experiment.ipynb

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions Pair Parameter Inference.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1492,17 +1492,19 @@
"using TimerOutputs\n",
"\n",
"@model function pair_param_inference_simple(pairs)\n",
" N = length(pairs)\n",
" # ____________________________________________________________________________________________________\n",
" # Step 1 - Sample prior parameters\n",
" \n",
" # Time parameter\n",
" t ~ Exponential(1.0) \n",
" # Time paramter for each pair\n",
" t ~ filldist(Exponential(1.0), N)\n",
" \n",
" # Alignment parameters\n",
" @submodel prefix=\"τ\" Λ = tkf92_prior()\n",
" # Dihedral parameters \n",
" @submodel prefix=\"Θ\" Ξ = jwndiff_prior()\n",
" # Check parameter validity \n",
" if t ≤ 0 || any(isnan.(Ξ)) || any(isnan.(Λ))\n",
" if any(t .≤ 0) || any(isnan.(Ξ)) || any(isnan.(Λ))\n",
" Turing.@addlogprob! -Inf; return\n",
" end\n",
" \n",
Expand All @@ -1520,7 +1522,7 @@
" τ = TKF92([t], Λ...)\n",
" \n",
" # Chain level model\n",
" Γ = ChainJointDistribution(ξ, τ)\n",
" Γ = ChainJointDistribution(ξ, TKF92([t[i]], Λ...))\n",
" \n",
" # ____________________________________________________________________________________________________\n",
" # Step 3 - Observe each pair X, Y by proxy of their joint probability, marginalising over alignments\n",
Expand Down
5,163 changes: 5,163 additions & 0 deletions data/pdb/1A9W.pdb

Large diffs are not rendered by default.

3,328 changes: 3,328 additions & 0 deletions data/pdb/1I3D.pdb

Large diffs are not rendered by default.

6,901 changes: 6,901 additions & 0 deletions data/pdb/2W31.pdb

Large diffs are not rendered by default.

3,181 changes: 3,181 additions & 0 deletions data/pdb/3VRF.pdb

Large diffs are not rendered by default.

9 changes: 8 additions & 1 deletion src/TorusEvol.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ export

# utils/Backbone
build_biochain_from_aminoacids_dihedrals,
build_biochain_from_aminoacids_dihedrals_alignment,
build_biochain_from_triple_alignment,
id_to_aa,
aa_to_id,
aminoacids,
Expand All @@ -22,12 +24,15 @@ export
Polypeptide,
from_pdb,
from_file,
to_file,
num_residues,
num_coords,
data,
chain,
render,
from_triple_alignment,
from_observed_chain,
from_aligned_polypeptide,
from_primary_dihedrals,

# objects/Chains
Expand All @@ -51,6 +56,7 @@ export
row_index,
subalignment,
combine,
glue,
mask,
show_filled_alignment,

Expand Down Expand Up @@ -169,7 +175,8 @@ export
logpdfs,
sample_anc_alignment,
ancestor_sampling,
trajectory_reconstruction
trajectory_reconstruction,
write_trajectory


# Objects
Expand Down
38 changes: 32 additions & 6 deletions src/inference/AncestorSampling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ end
function ancestor_sampling(M_YZ::Alignment,
Y::ObservedChain, Z::ObservedChain,
t::Real, ξ::MixtureProductProcess, Λ)
@info "reconstructing ancestor...\n"
C = num_coords(ξ)
E = num_regimes(ξ)

Expand Down Expand Up @@ -193,7 +194,7 @@ function trajectory_reconstruction(M_YZ::Alignment, Y::ObservedChain, Z::Observe
# ______________________________________________________________________________
# Base case
if levels == 0
return [Y, Z]
return [Y, Z], M_YZ
end

# ______________________________________________________________________________
Expand All @@ -204,11 +205,36 @@ function trajectory_reconstruction(M_YZ::Alignment, Y::ObservedChain, Z::Observe
X, M_XYZ = ancestor_sampling(M_YZ, Y, Z, t, ξ, Λ)

# 2. Reconstruct trajectories recursively on each branch
M_YX = subalignment(M_XYZ, [2, 1])
traj_YX = trajectory_reconstruction(M_YX, Y, X, ξ, t/2, Λ; levels=levels-1)
M_YX = subalignment(M_XYZ, [2, 1])
traj_YX, M_Y_toX = trajectory_reconstruction(M_YX, Y, X, ξ, t/2, Λ; levels=levels-1)
M_XZ = subalignment(M_XYZ, [1, 3])
traj_XZ = trajectory_reconstruction(M_XZ, X, Z, ξ, t/2, Λ; levels=levels-1)
traj_XZ, M_X_toZ = trajectory_reconstruction(M_XZ, X, Z, ξ, t/2, Λ; levels=levels-1)

# Combine the trajectories
return [traj_YX; traj_XZ[2:end]]
# Combine the trajectories and alignments
traj = [traj_YX; traj_XZ[2:end]]
M = glue(M_Y_toX, M_X_toZ)
return traj, M
end

function write_trajectory(chains::AbstractVector{ObservedChain},
M::Alignment,
poly_Y::Polypeptide,
poly_Z::Polypeptide,
name::String)
N = length(chains)

alignment = Alignment(data(M), collect(1:N))
polypeptides = Vector{Polypeptide}(undef, N)
polypeptides[1] = poly_Y
polypeptides[N] = poly_Z
for i 2:(N-1)
X = chains[i]
M_XYZ = subalignment(alignment, [i, 1, N])
polypeptides[i] = from_triple_alignment(poly_Y, poly_Z, X, M_XYZ)
end

names = [name * "_$i" for i 1:N]
to_file.(polypeptides, names)

return polypeptides
end
4 changes: 2 additions & 2 deletions src/inference/Priors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ end;

@model function jwndiff_prior()
μ ~ filldist(Uniform(-π, π), 2)
σ² ~ filldist(Gamma(π * 0.1), 2)
α ~ filldist(Gamma(π * 0.1), 2)
σ² ~ filldist(Gamma(1.1), 2)
α ~ filldist(Gamma(1.1), 2)
γ ~ Exponential(1.0) # jumping rate
α_corr ~ ScaledBeta(3, 3)

Expand Down
10 changes: 5 additions & 5 deletions src/inference/Proposals.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ mv_rw_proposal(v::AbstractVector, cov) = MvNormal(v, cov)
rw_proposal(x, var) = Normal(x, var)

Θ_samplers = [MH(Symbol("Θ.μ") => v -> torus_proposal(v)),
MH(Symbol("Θ.σ²") => v -> mv_rw_proposal(v, 0.4*I)),
MH(Symbol("Θ.α") => v -> mv_rw_proposal(v, 0.4*I)),
MH(Symbol("Θ.α_corr") => x -> rw_proposal(x, 0.5)),
MH(Symbol("Θ.σ²") => v -> mv_rw_proposal(v, 0.1*I)),
MH(Symbol("Θ.α") => v -> mv_rw_proposal(v, 0.1*I)),
MH(Symbol("Θ.α_corr") => x -> rw_proposal(x, 0.05)),
MH(Symbol("Θ.γ") => x -> rw_proposal(x, 0.5))]

τ_samplers = [MH(Symbol("τ.λμ") => v -> mv_rw_proposal(v, [0.4 0.1; 0.1 0.6])),
MH(Symbol("τ.r") => x -> rw_proposal(x, 0.5))]
τ_samplers = [MH(Symbol("τ.λμ") => v -> mv_rw_proposal(v, [0.2 0.02; 0.02 0.2])),
MH(Symbol("τ.r") => x -> rw_proposal(x, 0.2))]
15 changes: 15 additions & 0 deletions src/objects/Alignment.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,18 @@ function combine(parent_id::Int, a1::Alignment, a2::Alignment) :: Alignment
return Alignment(hcat(columns...), new_ids)

end


function glue(a1::Alignment, a2::Alignment) :: Alignment
n = num_sequences(a1)
m = num_sequences(a2)
ids1 = [collect(2:n); 1]
ids2 = 10000000 .+ collect(1:m); ids2[1] = 1
x = Alignment(data(a1), ids1)
y = Alignment(data(a2), ids2)
M = combine(1, x, y)


good_data = data(M)[[collect(2:n); 1; collect((n+1):(n+m-1))], :]
return Alignment(good_data)
end
123 changes: 111 additions & 12 deletions src/objects/Backbone.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,24 @@ using Rotations

const BioChain = BioStructures.Chain

ccmod(x) = rem(x,2π, RoundNearest)

function average_length(a, b)
(a + b) / 2
end

function average_angle(a, b)
if a > b
tmp = a; a = b; b = tmp
end
d1 = b - a
d2 = a + 2π - b
if d1 < d2
return a + d1/2
else
return ccmod(b + d2/2)
end
end

aminoacids = "ARNDCQEGHILKMFPSTWYV"
id_to_aa(i) = aminoacids[i]
Expand Down Expand Up @@ -114,8 +131,13 @@ function build_biochain_from_aminoacids_dihedrals(aminoacid_ids::AbstractArray{<
return build_chain_from_internals(id, aminoacids, torsion_angles, bond_angles, bond_lengths)
end

function build_biochain_from_alignment(chain::BioChain,
alignment::Alignment)
function build_biochain_from_aminoacids_dihedrals_alignment(chain::BioChain,
M_XY::Alignment,
aminoacid_ids::AbstractArray{<:Integer},
dihedrals::AbstractMatrix{<:Real};
id="X")
alignment = Alignment(data(M_XY))

maskX = mask(alignment, [[1], [0,1]])
maskY = mask(alignment, [[0,1], [1]])

Expand All @@ -127,37 +149,114 @@ function build_biochain_from_alignment(chain::BioChain,
match_maskX = mask(alignmentX, [[1], [1]])
match_maskY = mask(alignmentY, [[1], [1]])

aminoacids = String(id_to_aa.(get_coord(p, "aminoacid")))
aminoacids = String(id_to_aa.(vec(aminoacid_ids)))
@assert length(aminoacids) == N

torsion_angles = Matrix{Real}(undef, 3, n)
dihedrals = get_coord(p, "ramachadran angles")
torsion_angles[1, 2:end] = dihedrals[2, 1:(end-1)] # ψ
torsion_angles[3, 2:end] = dihedrals[1, 2:end] # ϕ
@assert size(dihedrals, 2) == N
torsion_angles = Matrix{Float64}(undef, 3, N)
torsion_angles[1, 2:end] = dihedrals[2, 1:(end-1)] # ψ
torsion_angles[3, 2:end] = dihedrals[1, 2:end] # ϕ
# ω
torsion_angles[2, insert_maskY] .= rand(Normal(ideal_omega_angle...), count(insert_maskY))
torsion_angles[2, match_maskY] .= omegaangles(chain, standardselector)[match_maskX]

bond_angles_X, bond_lengths_X = backbone_bond_angles_and_lengths(chain)

#TODO write this more succintly
bond_angles = Matrix{Real}(undef, 3, n)
for i in collect(1:n)[insert_maskY]
bond_angles = Matrix{Real}(undef, 3, N)
for i in collect(1:N)[insert_maskY]
bond_angles[1, i] = rand(WrappedNormal(ideal_bond_angle["CA-C-N", aminoacids[i]]...))[1]
bond_angles[2, i] = rand(WrappedNormal(ideal_bond_angle["C-N-CA", aminoacids[i]]...))[1]
bond_angles[3, i] = rand(WrappedNormal(ideal_bond_angle["N-CA-C", aminoacids[i]]...))[1]
end
bond_angles[:, match_maskY] .= bond_angles_X[:, match_maskX]

bond_lengths = Matrix{Real}(undef, 3, n)
for i in collect(1:n)[insert_maskY]
bond_lengths = Matrix{Real}(undef, 3, N)
for i in collect(1:N)[insert_maskY]
bond_lengths[1, i] = abs(rand(Normal(ideal_bond_length["C-N", aminoacids[i]]...)))
bond_lengths[2, i] = abs(rand(Normal(ideal_bond_length["N-CA", aminoacids[i]]...)))
bond_lengths[3, i] = abs(rand(Normal(ideal_bond_length["CA-C", aminoacids[i]]...)))
end
bond_lengths[:, match_maskY] .= bond_lengths_X[:, match_maskX]

return build_chain_from_internals("Y", aminoacids, torsion_angles, bond_angles, bond_lengths)
return build_chain_from_internals(id, aminoacids, torsion_angles, bond_angles, bond_lengths)
end

function build_biochain_from_triple_alignment(chainY::BioChain,
chainZ::BioChain,
M_XYZ::Alignment,
aminoacid_ids::AbstractArray{<:Integer},
dihedrals::AbstractMatrix{<:Real};
id="X")
alignment = Alignment(data(M_XYZ))
X_mask = mask(alignment, [[1], [0,1], [0,1]])
alignmentX = slice(alignment, X_mask)
Y_mask = mask(alignment, [[0,1], [1], [0,1]])
alignmentY = slice(alignment, Y_mask)
Z_mask = mask(alignment, [[0,1], [0,1], [1]])
alignmentZ = slice(alignment, Z_mask)


X_maskX = mask(alignmentX, [[1], [0], [0]])

XY_maskX = mask(alignmentX, [[1], [1], [0]])
XY_maskY = mask(alignmentY, [[1], [1], [0]])

XZ_maskX = mask(alignmentX, [[1], [0], [1]])
XZ_maskZ = mask(alignmentZ, [[1], [0], [1]])

XYZ_maskX = mask(alignmentX, [[1], [1], [1]])
XYZ_maskY = mask(alignmentY, [[1], [1], [1]])
XYZ_maskZ = mask(alignmentZ, [[1], [1], [1]])

N = count(X_mask)

aminoacids = String(id_to_aa.(vec(aminoacid_ids)))
@assert length(aminoacids) == N

@assert size(dihedrals, 2) == N
torsion_angles = Matrix{Float64}(undef, 3, N)
torsion_angles[1, 2:end] = dihedrals[2, 1:(end-1)] # ψ
torsion_angles[3, 2:end] = dihedrals[1, 2:end] # ϕ
# ω
torsion_angles[2, X_maskX] .= rand(Normal(ideal_omega_angle...), count(X_maskX))
torsion_angles[2, XY_maskX] .= omegaangles(chainY, standardselector)[XY_maskY]
torsion_angles[2, XZ_maskX] .= omegaangles(chainZ, standardselector)[XZ_maskZ]
torsion_angles[2, XYZ_maskX] .= average_angle.(omegaangles(chainY, standardselector)[XYZ_maskY],
omegaangles(chainZ, standardselector)[XYZ_maskZ])

bond_angles_Y, bond_lengths_Y = backbone_bond_angles_and_lengths(chainY)
bond_angles_Z, bond_lengths_Z = backbone_bond_angles_and_lengths(chainZ)

#TODO write this more succintly
bond_angles = Matrix{Real}(undef, 3, N)
for i in collect(1:N)[X_maskX]
bond_angles[1, i] = rand(WrappedNormal(ideal_bond_angle["CA-C-N", aminoacids[i]]...))[1]
bond_angles[2, i] = rand(WrappedNormal(ideal_bond_angle["C-N-CA", aminoacids[i]]...))[1]
bond_angles[3, i] = rand(WrappedNormal(ideal_bond_angle["N-CA-C", aminoacids[i]]...))[1]
end
bond_angles[:, XY_maskX] .= bond_angles_Y[:, XY_maskY]
bond_angles[:, XZ_maskX] .= bond_angles_Z[:, XZ_maskZ]
bond_angles[:, XYZ_maskX] .= average_angle.(bond_angles_Y[:, XYZ_maskY],
bond_angles_Z[:, XYZ_maskZ])


bond_lengths = Matrix{Real}(undef, 3, N)
for i in collect(1:N)[X_maskX]
bond_lengths[1, i] = abs(rand(Normal(ideal_bond_length["C-N", aminoacids[i]]...)))
bond_lengths[2, i] = abs(rand(Normal(ideal_bond_length["N-CA", aminoacids[i]]...)))
bond_lengths[3, i] = abs(rand(Normal(ideal_bond_length["CA-C", aminoacids[i]]...)))
end
bond_lengths[:, XY_maskX] .= bond_lengths_Y[:, XY_maskY]
bond_lengths[:, XZ_maskX] .= bond_lengths_Z[:, XZ_maskZ]
bond_lengths[:, XYZ_maskX] .= average_length.(bond_lengths_Y[:, XYZ_maskY],
bond_lengths_Z[:, XYZ_maskZ])

return build_chain_from_internals(id, aminoacids, torsion_angles, bond_angles, bond_lengths)
end



function build_chain_from_internals(id::String, aminoacids::String,
torsion_angles::Matrix{<:Real}, # ψ ω ϕ
bond_angles::Matrix{<:Real}, # CA-C-N C-N-CA N-CA-C
Expand Down
Loading

0 comments on commit e1a355c

Please sign in to comment.