Skip to content

Commit 7dc4ae7

Browse files
committed
import package
import folder with package
0 parents  commit 7dc4ae7

30 files changed

+416622
-0
lines changed

.DS_Store

6 KB
Binary file not shown.

DESCRIPTION

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
Package: lea
2+
Title: Methylation signature analysis with ICA project R package
3+
Version: 0.0
4+
Authors@R:
5+
person(given = "Lea",
6+
family = "Meunier",
7+
role = c("aut", "cre"),
8+
email = "[email protected]")
9+
Description: use independent components analysis (ICA) on methylation data
10+
to extract epigenetics signature and provide statistical analysis and
11+
representation to help interpretation
12+
License: GPL-3
13+
Encoding: UTF-8
14+
LazyData: true
15+
Roxygen: list(markdown = TRUE)
16+
RoxygenNote: 7.1.0
17+
Imports:
18+
stringr,
19+
fastICA,
20+
gridExtra,
21+
cowplot,
22+
ggplot2,
23+
RColorBrewer,
24+
plotrix,
25+
broom

Files/.DS_Store

6 KB
Binary file not shown.

Files/Introduction to MethICA.docx

14.4 KB
Binary file not shown.

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Generated by roxygen2: do not edit by hand
2+

R/.DS_Store

6 KB
Binary file not shown.

R/MC_extract.R

Lines changed: 496 additions & 0 deletions
Large diffs are not rendered by default.

R/feature_table.R

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
# Rd
2+
# description >> required
3+
# argument
4+
# item >> CpG_table >> with c("TargetID", "MAPINFO", "CHR") and "FORWARD_SEQUENCE" if add_seq_info = TRUE
5+
# item >> file_CpG_context >> Path of the file containing CpG context (HMD, PMD, LMR, UMR) file in .txt, .bed or Rdata with CpG feature to add to the table, with column c("chr", "start", "end") and the column to add
6+
# item >> name_col_CpG_context >> Names of columns in file_CpG_context that should be used to annotate CpG_table
7+
# item >> file_chrom_state >> Path of the file containing chromatin states file in .txt, .bed or Rdata with CpG feature to add to the table, with column c("chr", "start", "end") and the column to add
8+
# item >> name_col_chrom_state >> Names of columns in file_chrom_state that should be used to annotate CpG_table
9+
# item >> file_CGI >> Path of the file containing. file in .txt, .bed or Rdata with CpG feature to add to the table, with column c("chr", "start", "end") and the column to add
10+
# item >> name_col_CGI >> Names of columns in file_CGI that should be used to annotate CpG_table
11+
# item >> file_genes >> Path of the file containing. file in .txt, .bed or Rdata with CpG feature to add to the table, with column c("chr", "start", "end") and the column to add
12+
# item >> name_col_genes >> Names of columns in file_genes that should be used to annotate CpG_table
13+
# item >> file_replication >> Path of the file containing. file in .txt, .bed or Rdata with CpG feature to add to the table, with column c("chr", "start", "end") and the column to add
14+
# item >> name_col_replication >> Names of columns in file_replication that should be used to annotate CpG_table
15+
# item >> add_seq_info >> If = TRUE, add the number of adjacent CpG and the nucleotide context to the CpG_table
16+
# value >> return CpG_table annoted with Chromatine feature
17+
# author >> Léa Meunier
18+
# keyword >> CpG annotation
19+
#` @import stringr
20+
# end
21+
22+
23+
chromatin.feature <- function(CpG_table = CpG_table, file_CpG_context = NULL, name_col_CpG_context = NULL, file_chrom_state = NULL, name_col_chrom_state = NULL, file_CGI = NULL, name_col_CGI = NULL, file_genes = NULL, name_col_genes = NULL, file_replication = NULL, name_col_replication = NULL, add_seq_info = TRUE, save = FALSE, output.directory){
24+
25+
CpG_table = data.frame(CpG_table)
26+
27+
## Error if CpG_table does'nt contain minimal column c("TargetID", "MAPINFO", "CHR")
28+
missing_col = setdiff(c("TargetID", "MAPINFO", "CHR"), colnames(CpG_table))
29+
if(length(missing_col)>= 1)
30+
stop(patse0("column", missing_col,"not found in CpG_table"))
31+
32+
# Add several CpG feature
33+
# Chromatin state
34+
if(!is.null(file_chrom_state)){
35+
36+
file_name = stringr::str_split(file_chrom_state,"\\.", n = Inf, simplify = TRUE)
37+
extention = tolower(file_name[ncol(file_name)])
38+
39+
if(extention == "txt" | extention == "bed"){
40+
chrom_state = read.table(file_chrom_state , sep = "\t", header = T)
41+
}else if(extention == "rdata"){
42+
chrom_state = load.RData(file_chrom_state)
43+
}
44+
CpG_table = table.PosXSegm(table_Pos = CpG_table, table_Pos.chrom.col = "CHR", table_Pos.pos.col = "MAPINFO",
45+
table_Segm = chrom_state, table_Segm.chrom.col = "chr", table_Segm.start.col = "start",
46+
table_Segm.end.col = "end", cols_to_add = name_col_chrom_state, names_cols_to_add = c("state", "domain"))
47+
}
48+
49+
# CpG context
50+
if(!is.null(file_CpG_context)){
51+
52+
file_name = stringr::str_split(file_CpG_context,"\\.", n = Inf, simplify = TRUE)
53+
extention = tolower(file_name[ncol(file_name)])
54+
55+
if(extention == "txt" | extention == "bed"){
56+
CpG_context = read.table(file_CpG_context , sep = "\t", header = T)
57+
CpG_context$chr = paste("chr", CpG_context$chr, sep = "")
58+
}else if(extention == "rdata"){
59+
CpG_context = load.RData(file_CpG_context)
60+
}
61+
62+
CpG_table = table.PosXSegm(table_Pos = CpG_table, table_Pos.chrom.col = "CHR", table_Pos.pos.col = "MAPINFO",
63+
table_Segm = CpG_context, table_Segm.chrom.col = "chr", table_Segm.start.col = "start",
64+
table_Segm.end.col = "end", cols_to_add = name_col_CpG_context, names_cols_to_add = "CpG_context")
65+
}
66+
67+
# CGI feature
68+
if(!is.null(file_CGI)){
69+
70+
file_name = stringr::str_split(file_CGI,"\\.", n = Inf, simplify = TRUE)
71+
extention = tolower(file_name[ncol(file_name)])
72+
73+
if(extention == "txt" | extention == "bed"){
74+
CGI = read.table(file_CGI , sep = "\t", header = T)
75+
}else if(extention == "rdata"){
76+
CGI = load.RData(file_CGI)
77+
}
78+
79+
CpG_table = table.PosXSegm(table_Pos = CpG_table, table_Pos.chrom.col = "CHR", table_Pos.pos.col = "MAPINFO",
80+
table_Segm = CGI, table_Segm.chrom.col = "chr", table_Segm.start.col = "start",
81+
table_Segm.end.col = "end", cols_to_add = name_col_CGI, names_cols_to_add = "cgi_feature")
82+
}
83+
84+
# Genes feature
85+
if(!is.null(file_genes)){
86+
87+
file_name = stringr::str_split(file_genes,"\\.", n = Inf, simplify = TRUE)
88+
extention = tolower(file_name[ncol(file_name)])
89+
90+
if(extention == "txt" | extention == "bed"){
91+
Genes = read.table(file_genes , sep = "\t", header = T)
92+
}else if(extention == "rdata"){
93+
Genes = load.RData(file_genes)
94+
}
95+
96+
CpG_table = table.PosXSegm(table_Pos = CpG_table, table_Pos.chrom.col = "CHR", table_Pos.pos.col = "MAPINFO",
97+
table_Segm = Genes, table_Segm.chrom.col = "chr", table_Segm.start.col = "start",
98+
table_Segm.end.col = "end", cols_to_add = name_col_genes, names_cols_to_add = c("gene_name", "gene_feature"))
99+
}
100+
101+
# Replication timing
102+
if(!is.null(file_CGI)){
103+
104+
file_name = stringr::str_split(file_replication,"\\.", n = Inf, simplify = TRUE)
105+
extention = tolower(file_name[ncol(file_name)])
106+
107+
if(extention == "txt" | extention == "bed"){
108+
replicatio = read.table(file_replication , sep = "\t", header = T)
109+
}else if(extention == "rdata"){
110+
replicatio = load.RData(file_replication)
111+
}
112+
113+
CpG_table = table.PosXSegm(table_Pos = CpG_table, table_Pos.chrom.col = "CHR", table_Pos.pos.col = "MAPINFO",
114+
table_Segm = replicatio, table_Segm.chrom.col = "chr", table_Segm.start.col = "start",
115+
table_Segm.end.col = "end", cols_to_add = name_col_replication, names_cols_to_add = "decile")
116+
}
117+
118+
# Add nucleotide context and number of CpG in the adjacent sequence
119+
if(add_seq_info == TRUE){
120+
tmp_table_CpG = data.frame(CpG_table$TargetID , CpG_table$FORWARD_SEQUENCE, str_split(CpG_table$FORWARD_SEQUENCE, "\\[CG\\]", simplify = TRUE))
121+
colnames(tmp_table_CpG) = c("TargetID", "FORWARD_SEQUENCE", "FORWARD_SEQUENCE_pre", "FORWARD_SEQUENCE_post")
122+
123+
tmp_table_CpG$pre_context = stringr::str_sub(tmp_table_CpG$FORWARD_SEQUENCE_pre, -1, -1)
124+
tmp_table_CpG$post_context = stringr::str_sub(tmp_table_CpG$FORWARD_SEQUENCE_post, 1, 1)
125+
126+
CpG_table$context = apply(tmp_table_CpG, 1, function(x){
127+
if((x[5] == "C" | x[5] == "G")&(x[6] == "C" | x[6] == "G")){
128+
return("SCGS")
129+
}else if((x[5] == "C" | x[5] == "G")&(x[6] == "A" | x[6] == "T")){
130+
return("SCGW")
131+
}else if((x[5] == "A" | x[5] == "T")&(x[6] == "C" | x[6] == "G")){
132+
return("SCGW")
133+
}else{
134+
return("WCGW")
135+
}
136+
})
137+
138+
tmp_table_CpG$FORWARD_SEQUENCE = as.character(tmp_table_CpG$FORWARD_SEQUENCE)
139+
tmp_table_CpG$FORWARD_SEQUENCE_red = stringr::str_sub(tmp_table_CpG$FORWARD_SEQUENCE, 25, -25)
140+
tmp_table_CpG$FORWARD_SEQUENCE_red_post = stringr::str_sub(tmp_table_CpG$FORWARD_SEQUENCE_post, 2, -25)
141+
142+
tmp_table_CpG = data.frame(tmp_table_CpG)
143+
CpG_table$nb_flanking_CpG = sapply(tmp_table_CpG$FORWARD_SEQUENCE_red, function(x){return(length(gregexpr("CG", x)[[1]])-1)})
144+
145+
}
146+
147+
if(save == TRUE){
148+
if(!file.exists(output.directory)){
149+
dir.create(output.directory)
150+
}
151+
CpG_feature = CpG_table
152+
save(CpG_feature, file = paste0(output.directory, "CpG_feature.Rdata"))
153+
write.table(CpG_feature, file = paste0(output.directory, "CpG_feature.txt"))
154+
}
155+
156+
return(CpG_table)
157+
}

R/utils.R

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# Rd
2+
# description >> loads a Rdata file into an object
3+
# argument
4+
# item >> file.name >> path of the file to load
5+
# value >> object contain in the Rdata
6+
# author >> Lea Meunier
7+
# keyword >> Rdata
8+
# end
9+
10+
load.RData = function(file.name) {
11+
file.real.name = load(file.name, verbose = T)
12+
return(get(file.real.name))
13+
}
14+
15+
# Rd
16+
# description >> Remove factor of a table, data.frame or matrix
17+
# argument
18+
# item >> d >> table where you want to remove the factors
19+
# value >> table without factor
20+
# author >> Lea Meunier
21+
# keyword >> table
22+
# end
23+
24+
factoall <- function (d)
25+
{
26+
n <- ncol(d)
27+
for (i in 1:n) {
28+
if (is.factor(d[, i])) {
29+
d[, i] <- as.character(d[, i])
30+
na <- which(is.na(d[, i]))
31+
num <- suppressWarnings(as.numeric(d[, i]))
32+
nanum <- which(is.na(num))
33+
if (length(nanum) == length(na)) {
34+
d[, i] <- num
35+
}
36+
}
37+
}
38+
d
39+
}
40+
41+
42+
43+
# Rd
44+
# description >> Function to annotate positions in a data frame (dfPos) using segments in another data frame (dfSegm)
45+
# argument
46+
# item >> table_Pos >> Data frame with positions to annotate
47+
# item >> table_Pos.chrom.col >> Chromosome column in table_Pos
48+
# item >> table_Pos.pos.col >> Position column in table_Pos
49+
# item >> table_Segm >> required
50+
# item >> table_Segm.chrom.col >> Data frame with segments to use for annotating
51+
# item >> table_Segm.start.col >> Chromosome column in table_Segm
52+
# item >> table_Segm.end.col >> Position column in table_Segm
53+
# item >> cols_to_add >> Names of columns in table_Segm that should be used to annotate table_Pos
54+
# item >> names_cols_to_add >> Column names to give in the columns added to table_Pos
55+
# value >> table_Pos annotated with table_Segm information
56+
# author >> Lea Meunier
57+
# keyword >> annotation
58+
# end
59+
60+
table.PosXSegm <- function (table_Pos = NULL, table_Pos.chrom.col = "chrom", table_Pos.pos.col = "pos",
61+
table_Segm = NULL, table_Segm.chrom.col = "chrom", table_Segm.start.col = "start",
62+
table_Segm.end.col = "end", cols_to_add = NULL, names_cols_to_add = NULL)
63+
{
64+
for (col in names_cols_to_add){
65+
table_Pos[, col] <- NA
66+
}
67+
table_Segm. <- split(table_Segm, table_Segm[, table_Segm.chrom.col])
68+
table_Pos. <- split(table_Pos, table_Pos[, table_Pos.chrom.col])
69+
70+
for (chr in intersect(names(table_Segm.), names(table_Pos.))) {
71+
ind <- sapply(table_Pos.[[chr]][, table_Pos.pos.col],
72+
function(pos) {
73+
tmp <- which(table_Segm.[[chr]][, table_Segm.start.col] <=
74+
pos & table_Segm.[[chr]][, table_Segm.end.col] >= pos)
75+
tmp
76+
})
77+
for (j in 1:length(cols_to_add)) {
78+
table_Pos.[[chr]][, names_cols_to_add[j]] <- unlist(lapply(ind,
79+
function(z) paste(table_Segm.[[chr]][z, cols_to_add[j]],
80+
collapse = ",")))
81+
table_Pos.[[chr]][which(sapply(ind, length) == 0), names_cols_to_add[j]] <- NA
82+
}
83+
}
84+
85+
table_Pos <- unsplit(table_Pos., table_Pos[, table_Pos.chrom.col])
86+
return(table_Pos)
87+
}
88+
89+
90+
# Rd
91+
# description >> compute the enrichment of CpG feature
92+
# argument
93+
# item >> CpG_select >> CpG selection
94+
# item >> column >> feature to compute enrichment
95+
# item >> gene_table >> table with CpG feature
96+
# value >> table with enrichment results
97+
# author >> Lea Meunier
98+
# keyword >> test
99+
#` @import stringr
100+
# end
101+
102+
enrich.test <- function (CpG_select, column, CpG_feature)
103+
{
104+
total = table(unlist(stringr::str_split(CpG_feature[,column], ",")))
105+
nom_column = names(total)
106+
107+
selection = table(unlist(stringr::str_split(CpG_feature[CpG_select,column], ",")))[nom_column]
108+
enrich = (selection/sum(selection, na.rm = TRUE))/(total/sum(total, na.rm = TRUE))
109+
names(enrich) = nom_column
110+
return(enrich)
111+
}
112+
113+
114+
115+
116+
117+
118+

README.md

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
Abstract
2+
3+
Installation
4+
========
5+
Install from the GitHub repository using devtools:
6+
7+
install.packages("devtools")
8+
library(devtools)
9+
devtools::install_github("FunGeST/MethICA")
10+
11+
Input files
12+
========
13+
For the extraction of methylation signature _MethICA_ requires one mandatory input file -- a **methylation level** file (bval) describing methylation level of CpG (row) in the samples series (column). For the representation to help the interpretation of methylation signature, a further two files are required -- a **CpG annotation** file with interest CpG information, and a minimal **sample annotation** file.
14+
15+
**The input files should have the following format. Example input files are provided with the package.**
16+
17+
`1]. bval `: __methylation level data__
18+
19+
* rownames : CpG id
20+
* colnames : Samples id
21+
22+
**Optional: (the header is required, but the order of the columns can change)**
23+
24+
`2]. CpG annotation`:
25+
26+
* `CpG`: Sample CpG. Any alphanumeric string.
27+
* `CHROM`: Chromosome. Between chr1 and chr22 or the chrX or chrY ('chr' prefix required).
28+
29+
`3]. annot_data`: __sample annotation data__
30+
31+
* `Sample`: Sample identifier. Any alphanumeric string.
32+
33+
Running MethICA
34+
================
35+
36+
* The [RUNNING_METHICA_EXAMPLE](https://github.com/FunGeST/) folder contains an example dataset and an R script of a typical MethICA analysis using this data. Please try!</br>
37+
* [*Introduction to MethICA*] provides a comprehensive example of the MethICA workflow with detailed explanations of each function.</br>
38+
39+
40+
License
41+
========
42+
43+
Copyright (C) 2020 Léa Meunier
44+
45+
MethICA is a free software: you can redistribute it and/or modify
46+
it under the terms of the GNU General Public License as published by
47+
the Free Software Foundation, either version 3 of the License, or
48+
(at your option) any later version.
49+
50+
This program is distributed in the hope that it will be useful,
51+
but WITHOUT ANY WARRANTY; without even the implied warranty of
52+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
53+
GNU General Public License for more details.
54+
55+
You should have received a copy of the GNU General Public License
56+
along with this program. If not, see <http://www.gnu.org/licenses/>.

0 commit comments

Comments
 (0)