Skip to content

Commit

Permalink
polypeptide
Browse files Browse the repository at this point in the history
  • Loading branch information
nifets committed Apr 18, 2023
1 parent 8e5efb6 commit f5d6228
Show file tree
Hide file tree
Showing 7 changed files with 911 additions and 1,118 deletions.
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
7 changes: 3 additions & 4 deletions src/distributions/EvolHMM.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


struct EvolHMM <: ContinuousMatrixDistribution
M::Integer # Total alignment length M = sum(lengths)
lengths::AbstractVector{Int}
Expand All @@ -10,12 +8,13 @@ struct EvolHMM <: ContinuousMatrixDistribution
α::Array{Real}
end

function PairHMM(n, m, A, sub, diff, t)
function EvolHMM(n, m, A, sub, diff, t)
α = Array{Real}(undef, n+2, m+2, 4)
return PairHMM(n, m, A, sub, diff, t, α)
end

function _logpdf(d::PairHMM, x::AbstractMatrix{<:Real})
function _logpdf(d::EvolHMM, x::AbstractMatrix{<:Real})

return forward!(d.α, x[1:3, 1:d.n], x[4:6, 1:d.m], d.A, d.sub, d.diff, d.t)
end

Expand Down
6 changes: 0 additions & 6 deletions src/distributions/SubstitutionModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,7 @@ using DelimitedFiles
# Q = S diag(Π) where S is symmetric exchangeability matrix,
# Π is equilibrium frequencies

aminoacids = "ARNDCQEGHILKMFPSTWYV-"

id_to_aa(i) = aminoacids[round(Int, i)]

aminoacid_ids = Dict((aminoacids[i], i) for i eachindex(aminoacids))

aa_to_id(a) = aminoacid_ids[a]

WAG_S = LowerTriangular(map((x) -> x=="" ? 0.0 : x,
readdlm("./data/params/WAG_S.csv"; skipblanks=false, dims=(20,20))))
Expand Down
50 changes: 50 additions & 0 deletions src/model/Polypeptide.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
using BioSequences
using BioStructures

aminoacids = "ARNDCQEGHILKMFPSTWYV-"
id_to_aa(i) = aminoacids[round(Int, i)]

aminoacid_ids = Dict((aminoacids[i], Float(i)) for i eachindex(aminoacids))
aa_to_id(a) = aminoacid_ids[a]

# Methods to extract internal coordinates data from BioStructures.Chain object
sequence(chain::Chain) = aa_to_id.(collect(string(LongAA(chain, standardselector))))
phi_angles(chain::Chain) = phiangles(chain, standardselector)
psi_angles(chain::Chain) = phiangles(chain, standardselector)
omega_angles(chain::Chain) = omegaangles(chain, standardselector)
calpha_coords(chain::Chain) = coordarray(chain, calphaselector)

# ▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
# Polypeptide

# Each column of data represents the internal coordinates of a residue
# The data in each row is specific by row_names
struct Polypeptide
data::AbstractMatrix{<:Real}
row_names::AbstractVector{String}
chain::BioStructures.Chain
end

# Construct Polypeptide from BioStructures.Chain
function Polypeptide(chain::Chain; primary=true,
phi=true, psi=true, omega=false,
cartesian=false)
rows = []
row_names = []

primary && (push!(rows, sequence(chain)); push!(row_names, "primary"))
phi && (push!(rows, phi_angles(chain)); push!(row_names, "ϕ angles"))
psi && (push!(rows, psi_angles(chain)); push!(row_names, "ψ angles"))
omega && (push!(rows, omega_angles(chain)); push!(row_names, "ω angles"))
cartesian && (push!(rows, calpha_coords(chain)); push!(row_names, "Cα x coords");
push!(row_names, "Cα y coords"); push!(row_names, "Cα z coords"))

data = hcat(rows...)'

return Polypeptide(data, row_names, chain)
end

num_residues(p::Polypeptide) = size(p.data, 2)
num_coords(p::Polypeptide) = size(p.data, 1)
data(p::Polypeptide) = p.data
chain(p::Polypeptide) = p.chain
41 changes: 0 additions & 41 deletions src/model/Protein.jl

This file was deleted.

2 changes: 1 addition & 1 deletion src/utils/Backbone.jl
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ function build_chain_from_alignment(chain::Chain, alignment, Y)

bond_angles_X, bond_lengths_X = backbone_bond_angles_and_lengths(chain)

#TODO write this more succintly lol
#TODO write this more succintly
bond_angles = Matrix{Real}(undef, 3, n)
for i in indicesIY
bond_angles[1, i] = rand(WrappedNormal(ideal_bond_angle["CA-C-N", aminoacids[i]]...))[1]
Expand Down
1,922 changes: 856 additions & 1,066 deletions test.ipynb

Large diffs are not rendered by default.

0 comments on commit f5d6228

Please sign in to comment.