Skip to content

Commit

Permalink
feat: support custom modifications; see #75
Browse files Browse the repository at this point in the history
  • Loading branch information
sgibb committed Mar 15, 2021
1 parent fdff39a commit 69fb485
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 7 deletions.
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# topdownr 1.13
- New version for Bioc 3.13 (devel)

## Changes in version 1.13.1
- `as(..., "NCBSet")` now treats neutral losses and modifications as bonds as
well.
- `readTopDownFiles` gains a new argument `customModifications` to allow
user-defined modifications. Suggestion and first implementation by
Maša Babović <[email protected]> [2021-03-15].

# topdownr 1.12
- New version for Bioc 3.12 (release)

Expand Down
78 changes: 76 additions & 2 deletions R/fragments.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,54 @@
x
}

#' Add adducts to the output of MSnbase::calculateFragments
#'
#' @param x `data.frame`, output of [MSnbase::calculateFragments()].
#' @param n `numeric(1)`, length of sequence.
#' @param modifications `data.frame`, with 4 columns mass, name, location,
#' variable.
#' @return `data.frame`
#' @noRd
.addCustomModifications <- function(x, n, modifications) {
if (!nrow(modifications)) {
return(x)
}

if (!all(c("mass", "name", "location", "variable") %in%
colnames(modifications))) {
stop(
"The 'customModifications' data.frame must have the columns: ",
"'mass', 'name', 'location' and 'variable'."
)
}

if (is.character(modifications$location)) {
modifications$location[tolower(modifications$location) == "n-term"] <-
0L
modifications$location[tolower(modifications$location) == "c-term"] <-
n + 1L
modifications$location <- as.integer(modifications$location)
}

for (i in seq_len(nrow(modifications))) {
x <- .cusmod(
x, n=n,
id=modifications$name[i],
deltamz=modifications$mass[i],
location=modifications$location[i],
variable=modifications$variable[i]
)
}
x
}

#' Calculate Fragments (via MSnbase::calculateFragments)
#'
#' @param sequence `AAString`, peptide sequence
#' @param type `character`, type of fragments
#' @param modification `character`, unimod names
#' @param neutralLoss `list`, neutral loss (see [MSnbase::calculateFragments()])
#' @param adducts `data.frame`, with 3 columns mass, name, to
#' @param neutralLoss `list`, neutral loss (see [MSnbase::calculateFragments()])
#' @param sequenceOrder `character`, `c("original", "random", "inverse")`
#' @param verbose `logical`, verbose output?
#' @return `FragmentViews`
Expand All @@ -43,8 +84,9 @@
"Carbamidomethyl", "Acetyl",
"Met-loss"
),
neutralLoss=defaultNeutralLoss(),
customModifications=data.frame(),
adducts=data.frame(),
neutralLoss=defaultNeutralLoss(),
sequenceOrder=c(
"original",
"random",
Expand Down Expand Up @@ -103,6 +145,8 @@
if (all(!nchar(modifications))) {
modifications <- NULL
}
## TODO: replace by unimod package
d <- .addCustomModifications(d, nchar(csequence), customModifications)
d <- .addAdducts(d, adducts)

## remove protein sequence from data.frame (just added to calculate
Expand Down Expand Up @@ -273,3 +317,33 @@
.unimod765 <- function(x) {
gsub("^M([ACGPSTV])", "\\1", x)
}

#' Apply custom modification
#'
#' TODO: replace by unimod package
#'
#' @param fragments `data.frame`, generated by [MSnbase::calculateFragments()].
#' @param n `integer(1)`, length of peptide sequence.
#' @param id `character(1)`.
#' @param deltamz `numeric(1)`.
#' @param location `integer(1)`, 0 for "N-term", n+1 for "C-term" or index.
#' @param variable `logical(1)`, if TRUE the unmodified and modified fragments
#' are returned.
#' @return modified `data.frame`
#' @noRd
.cusmod <- function(x, n, id, deltamz, location, variable) {
nterm <- grepl("^a|^b|^c", x$type) & location <= x$pos
cterm <- grepl("^x|^y|^z", x$type) & location > n - x$pos

i <- nterm | cterm

m <- x
m$mz[i] <- m$mz[i] + deltamz
m$ion[i] <- paste0(m$ion[i], "_m", id)
m$type[i] <- paste0(m$type[i], "_m", id)

if (variable)
rbind(x, m[i, ], make.row.names = FALSE)
else
m
}
31 changes: 27 additions & 4 deletions R/functions-TopDownSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,23 @@
#' * `.experiments.csv` (method/fragmentation conditions)
#' * `.txt` (scan header information)
#'
#' `customModifications`: additional to the provided unimod modifications
#' available through the `modifications` argument `customModifications` allow to
#' apply user-definied modifications to the output of
#' [`MSnbase::calculateFragments()`][MSnbase::calculateFragments-methods].
#' The `customModifications` argument takes a
#' `data.frame` with the `mass` to add, the `name` of the modification, the
#' location (could be the position of the amino acid or "N-term"/"C-term"),
#' whether the modification is always seen (`variable=FALSE`) or both, the
#' modified and unmodified amino acid are present (`variable=TRUE`), e.g.
#' for Activation (which is available via `modification="Acetyl"`)
#' `data.frame(mass=42.010565, name="Acetyl", location="N-term", variable=FALSE)`
#' or variable one (that could be present or not):
#' `data.frame(mass=365.132, name="Custom", location=10, variable=TRUE)`
#'
#' If the `customModifications` `data.frame` contains multiple columns the
#' modifications are applied from row one to the last row one each time.
#'
#' `adducts`: *Thermo's Xtract*
#' allows some mistakes in deisotoping, mostly it
#' allows `+/- C13-C12` and `+/- H+`.
Expand Down Expand Up @@ -53,6 +70,8 @@
#' Use `NULL` to disable all modifications.
#' @param adducts `data.frame`,
#' with 3 columns, namely: mass, name, to, see details section.
#' @param customModifications `data.frame`,
#' with 4 columns, namely: mass, name, location, variable, see details section.
#' @param neutralLoss `list`,
#' neutral loss that should be applied, see
#' [`MSnbase::calculateFragments()`][MSnbase::calculateFragments-methods] and
Expand Down Expand Up @@ -112,6 +131,7 @@ readTopDownFiles <- function(path, pattern=".*",
type=c("a", "b", "c", "x", "y", "z"),
modifications=c("Carbamidomethyl",
"Acetyl", "Met-loss"),
customModifications=data.frame(),
adducts=data.frame(),
neutralLoss=MSnbase::defaultNeutralLoss(),
sequenceOrder=c("original", "random", "inverse"),
Expand Down Expand Up @@ -140,8 +160,9 @@ readTopDownFiles <- function(path, pattern=".*",
sequence=sequence,
type=type,
modifications=modifications,
neutralLoss=neutralLoss,
customModifications=customModifications,
adducts=adducts,
neutralLoss=neutralLoss,
sequenceOrder=sequenceOrder,
verbose=verbose
)
Expand Down Expand Up @@ -294,15 +315,17 @@ readTopDownFiles <- function(path, pattern=".*",
#' Create NCB Map (N-/C-terminal, or Bidirectional)
#'
#' @param object `TopDownSet`
#' @param nterm `character(1)`, regular expression to match N-term
#' @param cterm `character(1)`, regular expression to match C-term
#' @return `Matrix`, Nterm == 1, Cterm == 2, bidirectional == 3
#' @noRd
.ncbMap <- function(object, nterm=c("a", "b", "c"), cterm=c("x", "y", "z")) {
.ncbMap <- function(object, nterm="^a|^b|^c", cterm="^x|^y|^z") {
.isTopDownSet(object)

w <- width(object@rowViews)
mn <- mc <- object@assay
selN <- fragmentType(object) %in% nterm
selC <- fragmentType(object) %in% cterm
selN <- grepl(nterm, fragmentType(object))
selC <- grepl(cterm, fragmentType(object))
mn[!selN, ] <- 0L
mn <- drop0(mn)
mc[!selC, ] <- 0L
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test_TopDownSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ test_that(".ncbMap", {
dimnames=list(paste0("bond", 1:3), c()))

expect_equal(.ncbMap(tds), r)
expect_equal(.ncbMap(tds1), r1)
expect_equal(.ncbMap(tds1, cterm = "^x$|^y$|^z$"), r1)
})

test_that("normalize", {
Expand Down
52 changes: 52 additions & 0 deletions tests/testthat/test_fragments.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,35 @@ test_that(".addAdducts", {
expect_equal(.addAdducts(d, a), rbind(d, r))
})

test_that(".addCustomModifications", {
d <- data.frame(mz=1:6, ion=c("c1", "c2", "c3", "x1", "x2", "y1"),
type=rep(c("c", "x", "y"), 3:1), z=1,
pos=c(1:3, 1:2, 1),
seq=c("A", "AC", "ACE", "E", "CE", "E"),
stringsAsFactors=FALSE)
m <- data.frame(mass=42, name="Acetyl", location="N-term", variable=FALSE)
r <- d
r$mz[1:3] <- r$mz[1:3] + 42
r$type[1:3] <- paste0(r$type[1:3], "_mAcetyl")
r$ion[1:3] <- paste0(r$ion[1:3], "_mAcetyl")
expect_error(.addCustomModifications(d, 3, d),
"'mass', 'name', 'location' and 'variable'")
expect_equal(.addCustomModifications(d, 3, data.frame()), d)
expect_equal(.addCustomModifications(d, 3, m), r)
m$variable <- TRUE
expect_equal(.addCustomModifications(d, 3, m), rbind(d, r[1:3,]))
m <- rbind(m, data.frame(mass=2, name="Foo", location=2, variable=FALSE))
r <- rbind(d, r[1:3,])
i <- c(2, 3, 5, 8, 9)
r$mz[i] <- r$mz[i] + 2
r$type[i] <- paste0(r$type[i], "_mFoo")
r$ion[i] <- paste0(r$ion[i], "_mFoo")
expect_equal(.addCustomModifications(d, 3, m), r)
r$type[7:9] <- c("c_mAcetyl", "c_mFoo_mAcetyl", "c_mFoo_mAcetyl")
r$ion[7:9] <- c("c1_mAcetyl", "c2_mFoo_mAcetyl", "c3_mFoo_mAcetyl")
expect_equal(.addCustomModifications(d, 3, m[2:1,]), r)
})

test_that(".matchFragments", {
expect_equal(.matchFragments(mz=integer(), fmass=1:3), integer())
expect_equal(.matchFragments(mz=1:3, fmass=integer()),
Expand Down Expand Up @@ -116,3 +145,26 @@ test_that(".unimod765", {
expect_equal(.unimod765(c("MACE", "MWE", "EAC")),
c("ACE", "MWE", "EAC"))
})

test_that(".cusmod", {
d <- data.frame(mz=1:6, ion=c("c1", "c2", "c3", "x1", "x2", "y1"),
type=rep(c("c", "x", "y"), 3:1), z=1,
pos=c(1:3, 1:2, 1),
seq=c("A", "AC", "ACE", "E", "CE", "E"),
stringsAsFactors=FALSE)
r <- d
r$mz[4:6] <- d$mz[4:6] + 0.5
r$type[4:6] <- paste(d$type[4:6], "mN", sep = "_")
r$ion[4:6] <- paste(d$ion[4:6], "mN", sep = "_")
expect_equal(
.cusmod(d, 3, id = "N", deltamz = 0.5, location = 4, variable = FALSE),
r
)

rvar <- d
rvar <- rbind(d, r[4:6,], make.row.names = FALSE)
expect_equal(
.cusmod(d, 3, id = "N", deltamz = 0.5, location = 4, variable = TRUE),
rvar
)
})

0 comments on commit 69fb485

Please sign in to comment.