Skip to content

Commit

Permalink
turing testing
Browse files Browse the repository at this point in the history
  • Loading branch information
nifets committed Apr 17, 2023
1 parent 6ecf3d8 commit 83e277a
Show file tree
Hide file tree
Showing 16 changed files with 19,770 additions and 429 deletions.
640 changes: 245 additions & 395 deletions Manifest.toml

Large diffs are not rendered by default.

24 changes: 2 additions & 22 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,43 +1,23 @@
name = "TorusEvol"
uuid = "bb045c0a-7050-477d-931f-b94f09181689"
authors = ["Stefan Manolache <[email protected]>"]
version = "0.1.0"

[deps]
Bio3DView = "99c8bb3a-9d13-5280-9740-b4880ed9c598"
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc"
BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
Blink = "ad839575-38b3-5650-b840-f874b8c74a25"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c"
ExponentialAction = "e24c0720-ea99-47e8-929e-571b494574d3"
FastaIO = "a0c94c4b-ebed-5953-b5fc-82fe598ac79f"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0"
HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73"
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
PlotlyJS = "f0f68f2c-4968-5e81-91da-67840de0976a"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
RandomMatrices = "2576dda1-a324-5b11-aa66-c48ed7e3c618"
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
Rotations = "6038ab10-8711-5258-84ad-4b1120ba62dc"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
SubstitutionModels = "8365b1bb-bd83-58ee-a267-f2965fc81c73"
Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
UpdateJulia = "770da0de-323d-4d28-9202-0e205c1e0aff"
WebIO = "0f1e0344-ec1d-5b48-a673-e5cf874b6c29"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
1,817 changes: 1,817 additions & 0 deletions data/pdb/1MBN.pdb

Large diffs are not rendered by default.

3,294 changes: 3,294 additions & 0 deletions data/pdb/1UMO.pdb

Large diffs are not rendered by default.

3,065 changes: 3,065 additions & 0 deletions data/pdb/2DHB.pdb

Large diffs are not rendered by default.

5,477 changes: 5,477 additions & 0 deletions data/pdb/3MKB.pdb

Large diffs are not rendered by default.

3,129 changes: 3,129 additions & 0 deletions data/pdb/4ZVA.pdb

Large diffs are not rendered by default.

1,709 changes: 1,709 additions & 0 deletions data/pdb/7ZOS.pdb

Large diffs are not rendered by default.

Binary file added persistence.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
52 changes: 41 additions & 11 deletions src/PairEvol.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ Turing.setrdcache(true)

sub = WAG_SubstitutionProcess


# Sample structure parameters
# todo more principles prior definition
# todo more principled prior definition
mean ~ filldist(Uniform(-π, π), 2)
var ~ filldist(Exponential(1), 4)
# α_corr ~ truncated(Normal(0, 1), -1, 1)
Expand Down Expand Up @@ -61,6 +62,12 @@ function sample_posterior(chainX, chainY, l, n_samples)
alg = HMC(0.05, 10, :t, :mean, :var, , :r, , :seq_length)

chain = sample(model, alg, n_samples)

# Index the chain with the persistence probabilities.
subchain = chain[["t", "r", "seq_length"]]

plt = plot(subchain; seriestype=:traceplot, title="Persistence Probability", legend=false)
savefig(plt, "persistence.png")
return chain
end

Expand Down Expand Up @@ -140,8 +147,8 @@ end
function backward_sampling(α, A)
n_x, n_y = size(α) .- 2
states = 1:4
logp = logsumexp([α[n_x, n_y, q] + A[q, END] for q in states])
s = rand(Categorical(exp.([α[n_x, n_y, q] + A[q, END] - logp for q in states])))
logp = logsumexp([α[n_x+2, n_y+2, q] + A[q, END] for q in states])
s = rand(Categorical(exp.([α[n_x+2, n_y+2, q] + A[q, END] - logp for q in states])))

i = n_x
j = n_y
Expand All @@ -154,10 +161,10 @@ function backward_sampling(α, A)
new_i = s [MATCH, DELETE] ? (i-1) : i
new_j = s [MATCH, INSERT] ? (j-1) : j

lpS = A[START, s] + α[new_i, new_j, START] - α[i, j, s]
lpM = A[MATCH, s] + α[new_i, new_j, MATCH] - α[i, j, s]
lpD = A[DELETE, s] + α[new_i, new_j, DELETE] - α[i, j, s]
lpI = A[INSERT, s] + α[new_i, new_j, INSERT] - α[i, j, s]
lpS = A[START, s] + α[new_i+2, new_j+2, START] - α[i+2, j+2, s]
lpM = A[MATCH, s] + α[new_i+2, new_j+2, MATCH] - α[i+2, j+2, s]
lpD = A[DELETE, s] + α[new_i+2, new_j+2, DELETE] - α[i+2, j+2, s]
lpI = A[INSERT, s] + α[new_i+2, new_j+2, INSERT] - α[i+2, j+2, s]

i = new_i
j = new_j
Expand All @@ -180,6 +187,27 @@ function filled_alignment(alignment, X, Y)
return filled_alignment
end

function indel_angle_change(filled)
num_indels = 0
num = 0
indel_angle = [0, 0]
angle = [0, 0]
j = -1
for i in axes(filled, 2)
if filled[1, i] == '-' || filled[4, i] == '-'
if j >= 1
indel_angle += abs.(filled[2:3, j] .- filled[5:6, j])
end
num_indels += 1
else
angle += abs.(filled[2:3, i] .- filled[5:6, i])
j = i
num += 1
end
end
return indel_angle / num_indels, angle / num
end

function pair_align(chainX, chainY; t=0.1)
sub = WAG_SubstitutionProcess

Expand Down Expand Up @@ -210,17 +238,19 @@ function pair_align(chainX, chainY; t=0.1)
n_y = size(Y, 2)
states = 1:4

α = OffsetArray{Real}(undef, -1:n_x, -1:n_y, states)
α = Array{Real}(undef, n_x+2, n_y+2, 4)
forward!(α, X, Y, A, sub, diff, t)
println("Alignments sampled from TKF92-WAG_WrappedDiff evol model")
for _ in 1:10
alignment = backward_sampling(α, A)

filled = filled_alignment(alignment, X, Y)
seqX, seqY = string(filled[1, :]...), string(filled[4, :]...)
println(seqX)
println(seqY)
println()
display(filled)
display(indel_angle_change(filled))
#println(seqX)
#println(seqY)
#println()
end
return
end
Empty file.
12 changes: 12 additions & 0 deletions src/distributions/Processes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@ end
statdist(p::ProductProcess) = arraydist(statdist.(p.processes))
transdist(p::ProductProcess, t::Real, x₀) = arraydist(transdist.(p.procceses, Ref(t), Ref(x₀)))

# __________________________________________________________________________________________
# Mixture of processes
struct MixtureProcess{D <: Distribution} <: AbstractProcess{D}
weights::AbstractVector{Real}
processes::AbstractVector{AbstractProcess{D}}
end

statdist(p::ProductProcess) = MixtureModel(statdist.(p.processes), p.weights)
transdist(p::ProductProcess, t::Real, x₀) = MixtureModel(transdist.(p.procceses, Ref(t), Ref(x₀)),
p.weights)


# __________________________________________________________________________________________
# Jumping process - returns to stationary distribution at some rate γ
struct JumpingProcess
Expand Down
2 changes: 1 addition & 1 deletion src/distributions/SubstitutionModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ using DelimitedFiles
# Q = S diag(Π) where S is symmetric exchangeability matrix,
# Π is equilibrium frequencies

aminoacids = "ARNDCQEGHILKMFPSTWYV"
aminoacids = "ARNDCQEGHILKMFPSTWYV-"

id_to_aa(i) = aminoacids[round(Int, i)]

Expand Down
99 changes: 99 additions & 0 deletions src/experiments/IndelAngleChanges.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
using BioSymbols
include("PairEvol.jl")

function ang_dist(ϕ_x, ϕ_y, ψ_x, ψ_y)
d = sqrt(4 - 2*cos(ϕ_x - ϕ_y) - 2*cos(ψ_x - ψ_y))
return d
end

function indel_angle_changes(chainX, chainY)
X = vcat(reshape(sequence(chainX), 1, :), angles(chainX))
X[isnan.(X)] .= 0.0
Y = vcat(reshape(sequence(chainY), 1, :), angles(chainY))
Y[isnan.(Y)] .= 0.0

println("Alignment generated by affine gap combinatorial model")
scoremodel = AffineGapScoreModel(BLOSUM62, gap_open=-10, gap_extend=-1);
aln = alignment(pairalign(GlobalAlignment(), LongAA(chainX, standardselector),
LongAA(chainY, standardselector), scoremodel))

my_aln = []
for (x, y) in aln
if x === AA_Gap
push!(my_aln, INSERT)
elseif y === AA_Gap
push!(my_aln, DELETE)
else
push!(my_aln, MATCH)
end
end

filled = filled_alignment(my_aln, X, Y)
angle_dist = Array{Real}(undef, length(aln))
indel_dist = similar(angle_dist)
matches = zeros(length(angle_dist))
angle_dist[1] = 0
indel_dist[1] = 100000000
angle_dist[end] = 0
indel_dist[end] = 100000000
for i in axes(filled, 2)[2:(end-1)]
#indel
if filled[1, i] == '-' || filled[4, i] == '-'
angle_dist[i] = 0
indel_dist[i] = 0
else
if filled[1, i] == filled[4, i]
matches[i] = 1
end
angle_dist[i] = ang_dist(filled[2, i], filled[3, i], filled[5, i], filled[6,i])
indel_dist[i] = indel_dist[i-1] + 1
end
end

for i in reverse(axes(filled, 2))[2:(end-1)]
#match
if !(filled[1, i] == '-' || filled[4, i] == '-')
indel_dist[i] = min(indel_dist[i], indel_dist[i+1] + 1)
end
end
indel_dist = indel_dist[2:(end-1)]
angle_dist = angle_dist[2:(end-1)]
matches = matches[2:(end-1)]

mask1 = filter(==(1), indel_dist)
angle1 = mean(angle_dist[mask1])
mask2 = filter(==(2), indel_dist)
angle2 = mean(angle_dist[mask2])
mask3 = filter(==(3), indel_dist)
angle3 = mean(angle_dist[mask3])
maskn = filter(x -> x [0,1,2,3], indel_dist)
anglen = mean(angle_dist[maskn])

maskmatch = filter(i -> indel_dist[i] >= 1 && matches[i] == 1, eachindex(matches))
masksub = filter(i -> indel_dist[i] >= 1 && matches[i] == 0, eachindex(matches))
anglematch = mean(angle_dist[maskmatch])
anglesub = mean(angle_dist[masksub])

return [angle1, angle2, angle3, anglen, anglematch, anglesub]
end

using Combinatorics, DataFrames, PrettyTables

function experiment()
human = retrievepdb("1A3N", dir="data/pdb")
hA, hB = human["A"], human["B"]
cy = retrievepdb("1UMO", dir="data/pdb")
my = retrievepdb("1MBN", dir="data/pdb")["A"]
cyA, cyB = cy["A"], cy["B"]
e = retrievepdb("4ZVA", dir="data/pdb")["A"]
pairs = [[hA, hB], [hA, cyB], [hB, cyB], [cyA, hA], [hA, my], [hB, my], [my, cyA],
[hA, e]]
res = vcat(map(v-> indel_angle_changes(v...)', pairs)...)
df = DataFrame(res, ["1 res from indel", "2 res from indel",
"3 res from indel", "far away from indels",
"match", "substitution"])

println("Angular changes during insertion/deletion events")
pretty_table(df, nosubheader=true)

end
878 changes: 878 additions & 0 deletions test.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
using Test

0 comments on commit 83e277a

Please sign in to comment.