kant
diff --git a/‎.DS_Store
6 KB b/‎.DS_Store
6 KB
diff --git a/‎DESCRIPTION
Lines changed: 25 additions & 0 deletions b/‎DESCRIPTION
Lines changed: 25 additions & 0 deletions
diff --git a/‎Files/.DS_Store
6 KB b/‎Files/.DS_Store
6 KB
diff --git a/‎Files/Introduction to MethICA.docx
14.4 KB b/‎Files/Introduction to MethICA.docx
14.4 KB
diff --git a/‎NAMESPACE
Lines changed: 2 additions & 0 deletions b/‎NAMESPACE
Lines changed: 2 additions & 0 deletions
diff --git a/‎R/.DS_Store
6 KB b/‎R/.DS_Store
6 KB
diff --git a/‎R/MC_extract.R
Lines changed: 496 additions & 0 deletions b/‎R/MC_extract.R
Lines changed: 496 additions & 0 deletions
diff --git a/‎R/feature_table.R
Lines changed: 157 additions & 0 deletions b/‎R/feature_table.R
Lines changed: 157 additions & 0 deletions
diff --git a/‎R/utils.R
Lines changed: 118 additions & 0 deletions b/‎R/utils.R
Lines changed: 118 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 56 additions & 0 deletions b/‎README.md
Lines changed: 56 additions & 0 deletions
@@ -0,0 +1,25 @@
+Package: lea
+Title: Methylation signature analysis with ICA project R package
+Version: 0.0
+Authors@R: 
+    person(given = "Lea",
+           family = "Meunier",
+           role = c("aut", "cre"),
+           email = "[email protected]")
+Description: use independent components analysis (ICA) on methylation data
+    to extract epigenetics signature and provide statistical analysis and
+    representation to help interpretation
+License: GPL-3
+Encoding: UTF-8
+LazyData: true
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.1.0
+Imports: 
+    stringr,
+    fastICA,
+    gridExtra,
+    cowplot,
+    ggplot2,
+    RColorBrewer,
+    plotrix,
+    broom
@@ -0,0 +1,2 @@
+# Generated by roxygen2: do not edit by hand
+
@@ -0,0 +1,157 @@
+# Rd
+# description >> required
+# argument
+# item >> CpG_table >> with c("TargetID", "MAPINFO", "CHR") and "FORWARD_SEQUENCE" if add_seq_info = TRUE  
+# item >> file_CpG_context >> Path of the file containing CpG context (HMD, PMD, LMR, UMR) file in .txt, .bed or Rdata with CpG feature to add to the table, with column c("chr", "start", "end") and the column to add
+# item >> name_col_CpG_context >> Names of columns in file_CpG_context that should be used to annotate CpG_table
+# item >> file_chrom_state >> Path of the file containing chromatin states file in .txt, .bed or Rdata with CpG feature to add to the table, with column c("chr", "start", "end") and the column to add
+# item >> name_col_chrom_state >> Names of columns in file_chrom_state that should be used to annotate CpG_table
+# item >> file_CGI >> Path of the file containing. file in .txt, .bed or Rdata with CpG feature to add to the table, with column c("chr", "start", "end") and the column to add
+# item >> name_col_CGI >> Names of columns in file_CGI that should be used to annotate CpG_table
+# item >> file_genes >> Path of the file containing. file in .txt, .bed or Rdata with CpG feature to add to the table, with column c("chr", "start", "end") and the column to add
+# item >> name_col_genes >> Names of columns in file_genes that should be used to annotate CpG_table
+# item >> file_replication >> Path of the file containing. file in .txt, .bed or Rdata with CpG feature to add to the table, with column c("chr", "start", "end") and the column to add
+# item >> name_col_replication >> Names of columns in file_replication that should be used to annotate CpG_table
+# item >> add_seq_info >> If = TRUE, add the number of adjacent CpG and the nucleotide context to the CpG_table
+# value >> return CpG_table annoted with Chromatine feature
+# author >> Léa Meunier
+# keyword >> CpG annotation
+#` @import stringr
+# end
+
+
+chromatin.feature <- function(CpG_table = CpG_table, file_CpG_context = NULL, name_col_CpG_context = NULL, file_chrom_state = NULL, name_col_chrom_state = NULL, file_CGI = NULL, name_col_CGI = NULL, file_genes = NULL, name_col_genes = NULL, file_replication = NULL, name_col_replication = NULL, add_seq_info = TRUE, save = FALSE, output.directory){
+	
+	CpG_table = data.frame(CpG_table)
+	
+	## Error if CpG_table does'nt contain minimal column c("TargetID", "MAPINFO", "CHR")
+	missing_col = setdiff(c("TargetID", "MAPINFO", "CHR"), colnames(CpG_table))
+	if(length(missing_col)>= 1)
+		stop(patse0("column", missing_col,"not found in CpG_table"))
+	
+	# Add several CpG feature
+	# Chromatin state
+	if(!is.null(file_chrom_state)){
+		
+		file_name = stringr::str_split(file_chrom_state,"\\.", n = Inf, simplify = TRUE)
+		extention = tolower(file_name[ncol(file_name)])
+		
+		if(extention == "txt" | extention == "bed"){
+			chrom_state = read.table(file_chrom_state , sep = "\t", header = T)
+		}else if(extention == "rdata"){
+			chrom_state = load.RData(file_chrom_state)
+		}
+		CpG_table = table.PosXSegm(table_Pos = CpG_table, table_Pos.chrom.col = "CHR", table_Pos.pos.col = "MAPINFO", 
+    		table_Segm = chrom_state, table_Segm.chrom.col = "chr", table_Segm.start.col = "start", 
+    		table_Segm.end.col = "end", cols_to_add = name_col_chrom_state, names_cols_to_add = c("state", "domain"))
+	}
+	
+	# CpG context
+	if(!is.null(file_CpG_context)){
+		
+		file_name = stringr::str_split(file_CpG_context,"\\.", n = Inf, simplify = TRUE)
+		extention = tolower(file_name[ncol(file_name)])
+		
+		if(extention == "txt" | extention == "bed"){
+			CpG_context = read.table(file_CpG_context , sep = "\t", header = T)
+			CpG_context$chr = paste("chr", CpG_context$chr, sep = "")
+		}else if(extention == "rdata"){
+			CpG_context = load.RData(file_CpG_context)
+		}
+		
+		CpG_table = table.PosXSegm(table_Pos = CpG_table, table_Pos.chrom.col = "CHR", table_Pos.pos.col = "MAPINFO", 
+    		table_Segm = CpG_context, table_Segm.chrom.col = "chr", table_Segm.start.col = "start", 
+    		table_Segm.end.col = "end", cols_to_add = name_col_CpG_context, names_cols_to_add = "CpG_context")
+	}
+	
+	# CGI feature
+	if(!is.null(file_CGI)){
+		
+		file_name = stringr::str_split(file_CGI,"\\.", n = Inf, simplify = TRUE)
+		extention = tolower(file_name[ncol(file_name)])
+		
+		if(extention == "txt" | extention == "bed"){
+			CGI = read.table(file_CGI , sep = "\t", header = T)
+		}else if(extention == "rdata"){
+			CGI = load.RData(file_CGI)
+		}
+		
+		CpG_table = table.PosXSegm(table_Pos = CpG_table, table_Pos.chrom.col = "CHR", table_Pos.pos.col = "MAPINFO", 
+    		table_Segm = CGI, table_Segm.chrom.col = "chr", table_Segm.start.col = "start", 
+    		table_Segm.end.col = "end", cols_to_add = name_col_CGI, names_cols_to_add = "cgi_feature")
+	}
+	
+	# Genes feature
+	if(!is.null(file_genes)){
+		
+		file_name = stringr::str_split(file_genes,"\\.", n = Inf, simplify = TRUE)
+		extention = tolower(file_name[ncol(file_name)])
+		
+		if(extention == "txt" | extention == "bed"){
+			Genes = read.table(file_genes , sep = "\t", header = T)
+		}else if(extention == "rdata"){
+			Genes = load.RData(file_genes)
+		}
+		
+		CpG_table = table.PosXSegm(table_Pos = CpG_table, table_Pos.chrom.col = "CHR", table_Pos.pos.col = "MAPINFO", 
+    		table_Segm = Genes, table_Segm.chrom.col = "chr", table_Segm.start.col = "start", 
+    		table_Segm.end.col = "end", cols_to_add = name_col_genes, names_cols_to_add = c("gene_name", "gene_feature"))
+	}
+	
+	# Replication timing
+	if(!is.null(file_CGI)){
+		
+		file_name = stringr::str_split(file_replication,"\\.", n = Inf, simplify = TRUE)
+		extention = tolower(file_name[ncol(file_name)])
+		
+		if(extention == "txt" | extention == "bed"){
+			replicatio = read.table(file_replication , sep = "\t", header = T)
+		}else if(extention == "rdata"){
+			replicatio = load.RData(file_replication)
+		}
+		
+		CpG_table = table.PosXSegm(table_Pos = CpG_table, table_Pos.chrom.col = "CHR", table_Pos.pos.col = "MAPINFO", 
+    		table_Segm = replicatio, table_Segm.chrom.col = "chr", table_Segm.start.col = "start", 
+    		table_Segm.end.col = "end", cols_to_add = name_col_replication, names_cols_to_add = "decile")
+	}
+	
+	# Add nucleotide context and number of CpG in the adjacent sequence
+	if(add_seq_info == TRUE){
+		tmp_table_CpG = data.frame(CpG_table$TargetID , CpG_table$FORWARD_SEQUENCE, str_split(CpG_table$FORWARD_SEQUENCE, "\\[CG\\]", simplify = TRUE))
+		colnames(tmp_table_CpG) = c("TargetID", "FORWARD_SEQUENCE", "FORWARD_SEQUENCE_pre", "FORWARD_SEQUENCE_post")
+		
+		tmp_table_CpG$pre_context = stringr::str_sub(tmp_table_CpG$FORWARD_SEQUENCE_pre, -1, -1)
+		tmp_table_CpG$post_context = stringr::str_sub(tmp_table_CpG$FORWARD_SEQUENCE_post, 1, 1)
+
+		CpG_table$context = apply(tmp_table_CpG, 1, function(x){
+			if((x[5] == "C" | x[5] == "G")&(x[6] == "C" | x[6] == "G")){
+				return("SCGS")
+			}else if((x[5] == "C" | x[5] == "G")&(x[6] == "A" | x[6] == "T")){
+				return("SCGW")
+			}else if((x[5] == "A" | x[5] == "T")&(x[6] == "C" | x[6] == "G")){
+				return("SCGW")
+			}else{
+				return("WCGW")
+			}
+		})
+		
+		tmp_table_CpG$FORWARD_SEQUENCE = as.character(tmp_table_CpG$FORWARD_SEQUENCE)
+		tmp_table_CpG$FORWARD_SEQUENCE_red = stringr::str_sub(tmp_table_CpG$FORWARD_SEQUENCE, 25, -25)
+		tmp_table_CpG$FORWARD_SEQUENCE_red_post = stringr::str_sub(tmp_table_CpG$FORWARD_SEQUENCE_post, 2, -25)
+
+		tmp_table_CpG = data.frame(tmp_table_CpG)
+		CpG_table$nb_flanking_CpG = sapply(tmp_table_CpG$FORWARD_SEQUENCE_red, function(x){return(length(gregexpr("CG", x)[[1]])-1)})
+
+	}
+	
+	if(save == TRUE){
+		if(!file.exists(output.directory)){
+  			dir.create(output.directory)
+		}
+		CpG_feature = CpG_table
+		save(CpG_feature, file = paste0(output.directory, "CpG_feature.Rdata"))	
+		write.table(CpG_feature, file = paste0(output.directory, "CpG_feature.txt"))			
+	}
+	
+	return(CpG_table)
+} 
@@ -0,0 +1,118 @@
+# Rd
+# description >> loads a Rdata file into an object
+# argument
+# item >> file.name >> path of the file to load
+# value >> object contain in the Rdata 
+# author >> Lea Meunier
+# keyword >> Rdata
+# end
+
+load.RData = function(file.name) {
+  file.real.name = load(file.name, verbose = T)
+  return(get(file.real.name))
+}
+
+# Rd
+# description >> Remove factor of a table, data.frame or matrix
+# argument
+# item >> d >> table where you want to remove the factors
+# value >> table without factor
+# author >> Lea Meunier
+# keyword >> table
+# end
+
+factoall <- function (d) 
+{
+    n <- ncol(d)
+    for (i in 1:n) {
+        if (is.factor(d[, i])) {
+            d[, i] <- as.character(d[, i])
+            na <- which(is.na(d[, i]))
+            num <- suppressWarnings(as.numeric(d[, i]))
+            nanum <- which(is.na(num))
+            if (length(nanum) == length(na)) {
+                d[, i] <- num
+            }
+        }
+    }
+    d
+}
+
+
+
+# Rd
+# description >> Function to annotate positions in a data frame (dfPos) using segments in another data frame (dfSegm)
+# argument
+# item >> table_Pos >> Data frame with positions to annotate
+# item >> table_Pos.chrom.col >> Chromosome column in table_Pos
+# item >> table_Pos.pos.col >> Position column in table_Pos
+# item >> table_Segm >> required
+# item >> table_Segm.chrom.col >> Data frame with segments to use for annotating
+# item >> table_Segm.start.col >> Chromosome column in table_Segm
+# item >> table_Segm.end.col >> Position column in table_Segm
+# item >> cols_to_add >> Names of columns in table_Segm that should be used to annotate table_Pos
+# item >> names_cols_to_add >> Column names to give in the columns added to table_Pos
+# value >> table_Pos annotated with table_Segm information
+# author >> Lea Meunier
+# keyword >> annotation
+# end
+
+table.PosXSegm <- function (table_Pos = NULL, table_Pos.chrom.col = "chrom", table_Pos.pos.col = "pos", 
+    table_Segm = NULL, table_Segm.chrom.col = "chrom", table_Segm.start.col = "start", 
+    table_Segm.end.col = "end", cols_to_add = NULL, names_cols_to_add = NULL) 
+{
+    for (col in names_cols_to_add){
+    	table_Pos[, col] <- NA
+    } 
+    table_Segm. <- split(table_Segm, table_Segm[, table_Segm.chrom.col])
+    table_Pos. <- split(table_Pos, table_Pos[, table_Pos.chrom.col])
+    
+	for (chr in intersect(names(table_Segm.), names(table_Pos.))) {
+		ind <- sapply(table_Pos.[[chr]][, table_Pos.pos.col], 
+			function(pos) {
+				tmp <- which(table_Segm.[[chr]][, table_Segm.start.col] <= 
+				pos & table_Segm.[[chr]][, table_Segm.end.col] >= pos)
+				tmp
+			})
+		for (j in 1:length(cols_to_add)) {
+			table_Pos.[[chr]][, names_cols_to_add[j]] <- unlist(lapply(ind, 
+				function(z) paste(table_Segm.[[chr]][z, cols_to_add[j]], 
+					collapse = ",")))
+			table_Pos.[[chr]][which(sapply(ind, length) == 0), names_cols_to_add[j]] <- NA
+		}
+	}
+   
+    table_Pos <- unsplit(table_Pos., table_Pos[, table_Pos.chrom.col])
+    return(table_Pos)
+}
+
+
+# Rd
+# description >> compute the enrichment of CpG feature 
+# argument
+# item >> CpG_select >> CpG selection 
+# item >> column >> feature to compute enrichment
+# item >> gene_table >> table with CpG feature
+# value >> table with enrichment results
+# author >> Lea Meunier
+# keyword >> test
+#` @import stringr
+# end
+
+enrich.test <- function (CpG_select, column, CpG_feature) 
+{
+    total = table(unlist(stringr::str_split(CpG_feature[,column], ",")))
+    nom_column = names(total)
+    
+    selection = table(unlist(stringr::str_split(CpG_feature[CpG_select,column], ",")))[nom_column]
+    enrich = (selection/sum(selection, na.rm = TRUE))/(total/sum(total, na.rm = TRUE))
+    names(enrich) = nom_column
+    return(enrich)
+}
+
+
+
+
+
+
+
@@ -0,0 +1,56 @@
+Abstract
+
+Installation
+========
+Install from the GitHub repository using devtools:
+
+    install.packages("devtools")
+    library(devtools)
+    devtools::install_github("FunGeST/MethICA")
+
+Input files
+========
+For the extraction of methylation signature _MethICA_ requires one mandatory input file -- a **methylation level** file (bval) describing methylation level of CpG (row) in the samples series (column). For the representation to help the interpretation of methylation signature, a further two files are required -- a **CpG annotation** file with interest CpG information, and a minimal **sample annotation** file.
+
+**The input files should have the following format. Example input files are provided with the package.**
+
+`1]. bval `: __methylation level data__
+
+* rownames : CpG id
+* colnames : Samples id
+
+**Optional: (the header is required, but the order of the columns can change)**
+
+`2]. CpG annotation`:
+
+* `CpG`: Sample CpG. Any alphanumeric string.
+* `CHROM`: Chromosome. Between chr1 and chr22 or the chrX or chrY ('chr' prefix required).
+
+`3]. annot_data`: __sample annotation data__
+
+* `Sample`: Sample identifier. Any alphanumeric string.
+
+Running MethICA
+================
+
+* The [RUNNING_METHICA_EXAMPLE](https://github.com/FunGeST/) folder contains an example dataset and an R script of a typical MethICA analysis using this data. Please try!</br>
+* [*Introduction to MethICA*] provides a comprehensive example of the MethICA workflow with detailed  explanations of each function.</br> 
+
+
+License
+========
+
+Copyright (C) 2020 Léa Meunier
+
+MethICA is a free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# Generated by roxygen2: do not edit by hand`
	`2`	`+`