From b33fa404974774689dd6b02eaa6954a191d8d5f8 Mon Sep 17 00:00:00 2001 From: jovesus Date: Thu, 22 Nov 2018 16:17:06 +0100 Subject: [PATCH 1/6] modified: rgt/tdf/Main.py --- rgt/tdf/Main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rgt/tdf/Main.py b/rgt/tdf/Main.py index 7dc359fa7..dc7f18c76 100644 --- a/rgt/tdf/Main.py +++ b/rgt/tdf/Main.py @@ -210,8 +210,8 @@ def main(): print("merging DBDs...") merge_DBD_regions(path=target) - print("merging DBSs...") - merge_DBSs(path=target) + # print("merging DBSs...") + # merge_DBSs(path=target) print("merging DNA counts...") merge_DNA_counts(path=target) # stat From e4f03587d7bbc7581ebef8c217e927606f46fc4b Mon Sep 17 00:00:00 2001 From: jovesus Date: Thu, 22 Nov 2018 17:31:50 +0100 Subject: [PATCH 2/6] modified: rgt/tdf/Main.py --- rgt/tdf/Main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rgt/tdf/Main.py b/rgt/tdf/Main.py index dc7f18c76..7dc359fa7 100644 --- a/rgt/tdf/Main.py +++ b/rgt/tdf/Main.py @@ -210,8 +210,8 @@ def main(): print("merging DBDs...") merge_DBD_regions(path=target) - # print("merging DBSs...") - # merge_DBSs(path=target) + print("merging DBSs...") + merge_DBSs(path=target) print("merging DNA counts...") merge_DNA_counts(path=target) # stat From c7fdefa864d808d831d30c0b7fe82829f3d7ca9b Mon Sep 17 00:00:00 2001 From: jovesus Date: Mon, 21 Jan 2019 12:23:32 +0100 Subject: [PATCH 3/6] modified: rgt/tdf/Main.py modified: rgt/tdf/RNADNABindingSet.py modified: rgt/tdf/Report.py modified: rgt/tdf/Statistics.py modified: rgt/tdf/Triplexes.py modified: rgt/tdf/triplexTools.py modified: rgt/viz/Main.py modified: rgt/viz/boxplot.py --- rgt/tdf/Main.py | 17 +++--- rgt/tdf/RNADNABindingSet.py | 4 +- rgt/tdf/Report.py | 108 ++++++++++++++++++------------------ rgt/tdf/Statistics.py | 6 +- rgt/tdf/Triplexes.py | 10 ++-- rgt/tdf/triplexTools.py | 9 +-- rgt/viz/Main.py | 2 +- rgt/viz/boxplot.py | 48 ++++++++-------- 8 files changed, 103 insertions(+), 101 deletions(-) diff --git a/rgt/tdf/Main.py b/rgt/tdf/Main.py index 7dc359fa7..215d9ad16 100644 --- a/rgt/tdf/Main.py +++ b/rgt/tdf/Main.py @@ -210,7 +210,7 @@ def main(): print("merging DBDs...") merge_DBD_regions(path=target) - print("merging DBSs...") + print("merging TTSs...") merge_DBSs(path=target) print("merging DNA counts...") merge_DNA_counts(path=target) @@ -331,7 +331,7 @@ def main(): # Triplexes triplexes = Triplexes(organism=args.organism, pars=args) tpx_de = triplexes.search_triplex(target_regions=tdf_input.dna.target_regions, - prefix="target_promoters", remove_temp=True) + prefix="target_promoters", remove_temp=True, summary_file=True) tpx_nde = triplexes.search_triplex(target_regions=tdf_input.dna.nontarget_regions, prefix="nontarget_promoters", remove_temp=True) t1 = time.time() @@ -361,8 +361,8 @@ def main(): no_binding_response(args=args, stat=stat.stat) else: reports = Report(pars=args, input=tdf_input, triplexes=triplexes, stat=stat) - reports.plot_lines(tpx=stat.tpx_def, ylabel="Number of DBSs", - linelabel="No. DBSs", filename=args.rn + "_lineplot.png") + reports.plot_lines(tpx=stat.tpx_def, ylabel="Number of TTSs", + linelabel="No. TTSs", filename=args.rn + "_lineplot.png") reports.barplot(filename=args.rn+"_barplot.png") reports.gen_html_promotertest() reports.gen_html_genes() @@ -408,7 +408,8 @@ def main(): stat = Statistics(pars=args) stat.tpx = triplexes.get_tpx(rna_fasta_file=os.path.join(args.o,"rna_temp.fa"), target_regions=tdf_input.dna.target_regions, - prefix="target_regions", remove_temp=args.rt, dna_fine_posi=False) + prefix="target_regions", remove_temp=args.rt, dna_fine_posi=False, + summary_file = True) stat.tpxf = triplexes.get_tpx(rna_fasta_file=os.path.join(args.o,"rna_temp.fa"), target_regions=tdf_input.dna.target_regions, @@ -451,11 +452,11 @@ def main(): else: reports = Report(pars=args, input=tdf_input, triplexes=triplexes, stat=stat) - reports.plot_lines(tpx=stat.tpx, ylabel="Number of DBSs", - linelabel="No. DBSs", filename=args.rn + "_lineplot.png") + reports.plot_lines(tpx=stat.tpx, ylabel="Number of TTSs", + linelabel="No. TTSs", filename=args.rn + "_lineplot.png") reports.boxplot(filename=args.rn + "_boxplot.png", matrix=stat.region_matrix, sig_region=stat.sig_DBD, truecounts=stat.counts_dbs.values(), sig_boolean=stat.data["region"]["sig_boolean"], - ylabel="Number of DBS on target regions") + ylabel="Number of TTS on target regions") reports.gen_html_regiontest() t3 = time.time() diff --git a/rgt/tdf/RNADNABindingSet.py b/rgt/tdf/RNADNABindingSet.py index 40e41540d..db16a38cd 100644 --- a/rgt/tdf/RNADNABindingSet.py +++ b/rgt/tdf/RNADNABindingSet.py @@ -497,10 +497,10 @@ def merge_rbs(self, rbss=None, rm_duplicate=False, asgene_organism=None, region_ self.merged_dict[r].remove_duplicates() if cutoff: - if cutoff > 1: + if cutoff >= 1: ccf = int(cutoff) else: - ccf = int(cutoff / 100 * len(region_set)) + ccf = int(cutoff * len(region_set)) # print(len(self.sequences)) # print(ccf) for r in self.merged_dict: diff --git a/rgt/tdf/Report.py b/rgt/tdf/Report.py index 39532863e..3926b2278 100644 --- a/rgt/tdf/Report.py +++ b/rgt/tdf/Report.py @@ -551,10 +551,10 @@ def gen_html_promotertest(self): "Target Promoter", None, "Non-target Promoter", None, "Statistics", None, "Target Promoter", None, "Non-target Promoter", None, "Statistics", None], [" ", " ", - "with DBS", "without DBS", "with DBS", "without DBS", "OR", "p-value", - "No. DBSs", "Other DBSs", "No. DBSs", "Other DBSs", "OR", "p-value"]] + "with TTS", "without TTS", "with TTS", "without TTS", "OR", "p-value", + "No. TTSs", "Other TTSs", "No. TTSs", "Other TTSs", "OR", "p-value"]] header_titles = [["", "", "Statistics on promoter level", None, None, None, None, None, - "Statistics on DBS level", None, None, None, None, None], + "Statistics on TTS level", None, None, None, None, None], ["Rank of the talbe", "DNA Binding Domain which is the functional region on RNA.", "Promoters of the differential expression genes.", None, @@ -562,18 +562,18 @@ def gen_html_promotertest(self): "Statistics based on promoters", None, "Promoters of the differential expression genes.", None, "Promoters of the non-differential expression genes.", None, - "Statistics based on DBSs", None], + "Statistics based on TTSs", None], ["", "", - "Number of target promoters which contain DBSs (DNA Binding Sites).", - "Number of target promoters which don't contain DBSs (DNA Binding Sites).", - "Number of non-target promoters which contain DBSs (DNA Binding Sites).", - "Number of non-target promoters which don't contain DBSs (DNA Binding Sites).", + "Number of target promoters which contain TTSs.", + "Number of target promoters which don't contain TTSs.", + "Number of non-target promoters which contain TTSs.", + "Number of non-target promoters which don't contain TTSs.", "Odds Ratio", "P-value", - "Number of DBSs found in the target promoters.", - "Number of DBSs not found in the target promoters.", - "Number of DBSs found in the non-target promoters.", - "Number of DBSs not found in the non-target promoters.", + "Number of TTSs found in the target promoters.", + "Number of TTSs not found in the target promoters.", + "Number of TTSs found in the non-target promoters.", + "Number of TTSs not found in the non-target promoters.", "Odds Ratio", "P-value"] ] border_list = [" style=\"border-right:1pt solid gray\"", @@ -586,7 +586,7 @@ def gen_html_promotertest(self): " style=\"border-right:1pt solid gray\""] else: header_list = [["#", "DBD", "Target Promoter", None, "Non-target Promoter", None, "Statistics", None, "Autobinding"], - [" ", " ", "with DBS", "without DBS", "with DBS", "without DBS", "OR", "p", "Number"]] + [" ", " ", "with TTS", "without TTS", "with TTS", "without TTS", "OR", "p", "Number"]] header_titles = [["Rank of the talbe", "DNA Binding Domain which is the functional region on RNA.", "Promoters of the differential expression genes.", None, @@ -594,10 +594,10 @@ def gen_html_promotertest(self): "Statistics based on promoters", None, "The RNA regions which bind to themselves"], ["", "", - "Number of target promoters which contain DBSs (DNA Binding Sites).", - "Number of target promoters which don't contain DBSs (DNA Binding Sites).", - "Number of non-target promoters which contain DBSs (DNA Binding Sites).", - "Number of non-target promoters which don't contain DBSs (DNA Binding Sites).", + "Number of target promoters which contain TTSs.", + "Number of target promoters which don't contain TTSs.", + "Number of non-target promoters which contain TTSs.", + "Number of non-target promoters which don't contain TTSs.", "Odds Ratio", "P-value", "Number"] ] border_list = ["style=\"border-right:1pt solid gray\"", @@ -659,8 +659,8 @@ def gen_html_promotertest(self): header_titles=header_titles, border_list=border_list, sortable=True) html.add_heading("Notes") html.add_list(["DBD stands for functional DNA Binding Domain on RNA.", - "RBS stands for RNA Binding Site on RNA.", - "DBS stands for DNA Binding Site on DNA."]) + "TFO stands for triplex forming oligonucleotide.", + "TTS stands for triplex target DNA site."]) #### html.add_fixed_rank_sortable() html.write(os.path.join(self.pars.o, "index.html")) @@ -680,7 +680,7 @@ def gen_html_promotertest(self): else: score_header = ["Fold Change Score"] - header_list = ["#", "Promoter", "Gene", "DBSs counts", "DBS coverage"] + header_list = ["#", "Promoter", "Gene", "TTSs counts", "TTS coverage"] header_list += score_header header_list += ["Sum of Ranks"] header_titles = ["", "Target promoters", "Gene symbol", @@ -691,7 +691,7 @@ def gen_html_promotertest(self): header_titles += ["Sum up the ranks from left-hand side columns"] else: - header_list = ["#", "Promoter", "Gene", "DBSs Count", "DBS coverage", "Sum of Ranks"] + header_list = ["#", "Promoter", "Gene", "TTSs Count", "TTS coverage", "Sum of Ranks"] header_titles = ["", "Target promoters", "Gene symbol", "Number of DNA Binding sites locating within the promoter", @@ -798,7 +798,7 @@ def gen_html_promotertest(self): ["known_only", "-known_only", str(self.pars.known_only)], ["Promoter length", "-pl", str(self.pars.pl)], ["Alpha level for rejection p value", "-a", str(self.pars.a)], - ["Cut off value for filtering out the low counts of DBSs", "-ccf", str(self.pars.ccf)], + ["Cut off value for filtering out the DBD with low counts of triplexes", "-ccf", str(self.pars.ccf)], ["Remove temporary files", "-rt", str(self.pars.rt)], # ["Input file for RNA accecibility", "-ac", str(self.pars.ac)], # ["Cut off value for RNA accecibility", "-accf", str(self.pars.accf)], @@ -843,7 +843,7 @@ def gen_html_genes(self, align = 50, nonDE=False): # score_header = ["Fold_change", "Filtered"] # else: score_header = ["Fold Change Score"] - header_listp = ["#", "Promoter", "Gene", "DBSs Count", "DBS coverage"] + header_listp = ["#", "Promoter", "Gene", "TTSs Count", "TTS coverage"] header_listp += score_header header_listp += ["Sum of Ranks"] @@ -855,7 +855,7 @@ def gen_html_genes(self, align = 50, nonDE=False): header_titlesp += ["Sum up the ranks from left-hand side columns"] else: - header_listp = ["#", "Promoter", "Gene", "DBSs Count", "DBS coverage", "Sum of Ranks"] + header_listp = ["#", "Promoter", "Gene", "TTSs Count", "TTS coverage", "Sum of Ranks"] header_titlesp = ["", "Target promoters", "Gene symbol", "Number of DNA Binding sites locating within the promoter", @@ -924,16 +924,16 @@ def gen_html_genes(self, align = 50, nonDE=False): html.add_zebra_table(header_listp, col_size_list, type_list, data_table, align=align, cell_align="left", header_titles=header_titlesp, border_list=None, sortable=True, clean=True) html.add_heading("Notes") - html.add_list(["DBS stands for DNA Binding Site on DNA.", - "DBS coverage is the proportion of the promoter where has potential to form triple helices with the given RNA."]) + html.add_list(["TTS stands for triplex target DNA site.", + "TTS coverage is the proportion of the promoter where has potential to form triple helices with the given RNA."]) html.add_fixed_rank_sortable() html.write(os.path.join(self.pars.o, "promoters.html")) ############################ # Subpages for promoter centered page # promoters_dbds.html - header_sub = ["#", "RBS", "DBS", "Strand", "Score", "Motif", "Orientation", "Sequence"] - header_titles = ["", "RNA Binding Site", "DNA Binding Site", "Strand of DBS on DNA", + header_sub = ["#", "TFO", "TTS", "Strand", "Score", "Motif", "Orientation", "Sequence"] + header_titles = ["", "RNA Binding Site", "DNA Binding Site", "Strand of TTS on DNA", "Score of binding event", "Motif of binding by triple helix rule", "Orientation of interaction between DNA and RNA. 'P'- Parallel; 'A'-Antiparallel", "Binding Sequence between DNA and RNA"] header_list = header_sub @@ -1040,8 +1040,8 @@ def gen_html_genes(self, align = 50, nonDE=False): html.add_zebra_table(header_listp, col_size_list, type_list, data_table, align=align, cell_align="left", header_titles=header_titlesp, border_list=None, sortable=True, clean=True) html.add_heading("Notes") - html.add_list(["DBS stands for DNA Binding Site on DNA.", - "DBS coverage is the proportion of the promoter where has potential to form triple helices with the given RNA."]) + html.add_list(["TTS stands for triplex target DNA site.", + "TTS coverage is the proportion of the promoter where has potential to form triple helices with the given RNA."]) html.add_fixed_rank_sortable() html.write(os.path.join(self.pars.o, "spromoters.html")) @@ -1128,16 +1128,16 @@ def gen_html_regiontest(self): if self.pars.showdbs: header_list = [["#", "DBD", "Target Regions", None, "Non-target Regions", None, "Statistics", "Target Regions", "Non-target Regions", None, "Statistics"], - ["", "", "with DBS", "without DBS", "with DBS (average)", "s.d.", "p-value", - "NO. DBSs", "NO. DBSs (average)", "s.d.", "p-value"]] + ["", "", "with TTS", "without TTS", "with TTS (average)", "s.d.", "p-value", + "NO. TTSs", "NO. TTSs (average)", "s.d.", "p-value"]] header_titles = [["Rank", "DNA Binding Domain", "Given target regions on DNA", None, "Regions from randomization", None, "Statistics based on target regions", "Given target regions on DNA", "Regions from randomization", None, "Statistics based on DNA Binding Sites"], ["", "", - "Number of target regions with DBS binding", - "Number of target regions without DBS binding", - "Average number of regions from randomization with DBS binding", + "Number of target regions with triplex binding", + "Number of target regions without triplex binding", + "Average number of regions from randomization with triplex binding", "Standard deviation", "P value", "Number of related DNA Binding Sites binding to target regions", "Average number of DNA Binding Sites binding to random regions", @@ -1152,15 +1152,15 @@ def gen_html_regiontest(self): " style=\"border-right:1pt solid gray\""] else: header_list = [["#", "DBD", "Target Regions", None, "Non-target Regions", None, "Statistics", None, "Autobinding"], - ["", "", "with DBS", "without DBS", "with DBS (average)", "s.d.", "p-value", + ["", "", "with TTS", "without TTS", "with TTS (average)", "s.d.", "p-value", "z-score", "Number"]] header_titles = [["Rank", "DNA Binding Domain", "Given target regions on DNA", None, "Regions from randomization", None, "Statistics based on target regions", None, "Regions bind to themselves"], ["", "", - "Number of target regions with DBS binding", - "Number of target regions without DBS binding", - "Average number of regions from randomization with DBS binding", + "Number of target regions with triplex binding", + "Number of target regions without triplex binding", + "Average number of regions from randomization with triplex binding", "Standard deviation", "P value", "Z-score", ""]] border_list = [" style=\"border-right:1pt solid gray\"", " style=\"border-right:1pt solid gray\"", "", @@ -1212,7 +1212,7 @@ def gen_html_regiontest(self): html.add_list(["RNA name: " + self.pars.rn, "Randomization is performed for " + str(self.pars.n) + " times.", "DBD stands for DNA Binding Domain on RNA.", - "DBS stands for DNA Binding Site on DNA."]) + "TTS stands for triplex target site."]) html.add_fixed_rank_sortable() html.write(os.path.join(self.pars.o, "index.html")) @@ -1222,12 +1222,12 @@ def gen_html_regiontest(self): header_list = ["#", "Target Region", "Associated Gene", - "No. of DBSs", - "DBS coverage"] + "No. of TTSs", + "TTS coverage"] header_titles = ["Rank", "Given target regions from BED files", "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)", "Number of DNA Binding Sites locate within the region", - "The proportion of the region covered by DBS binding"] + "The proportion of the region covered by triplex binding"] ######################################################### # dbd_region.html @@ -1264,25 +1264,25 @@ def gen_html_regiontest(self): fig_rpath="../style", RGT_header=False, other_logo="TDF", homepage="../index.html") if self.pars.score: - header_list = ["#", "Target region", "Associated Gene", "DBSs Count", "Norm. DBSs", - "DBS coverage", "Score", "Sum of ranks"] + header_list = ["#", "Target region", "Associated Gene", "TTSs Count", "Norm. TTSs", + "TTS coverage", "Score", "Sum of ranks"] header_titles = ["Rank", "Target regions loaded from the given BED file", "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)", "Number of DNA Binding Sites within the region", "Normalized Number of DNA Binding Sites within the region (per 1000 bp)", - "The proportion of the region covered by DBS binding", + "The proportion of the region covered by TTS binding", "Scores from BED file", "Sum of all the left-hand-side ranks"] else: - header_list = ["#", "Target region", "Associated Gene", "DBSs Count", "Norm. DBSs", - "DBS coverage", "Sum of ranks"] + header_list = ["#", "Target region", "Associated Gene", "TTSs Count", "Norm. TTSs", + "TTS coverage", "Sum of ranks"] header_titles = ["Rank", "Target regions loaded from the given BED file", "Associated genes which is overlapping with the given region or close to it (less than 50000 bp)", "Number of DNA Binding Sites within the region", "Normalized Number of DNA Binding Sites within the region (per 1000 bp)", - "The proportion of the region covered by DBS binding", + "The proportion of the region covered by triplex binding", "Sum of all the left-hand-side ranks"] html.add_heading("Target Regions") data_table = [] @@ -1414,16 +1414,16 @@ def gen_html_regiontest(self): html.add_zebra_table(header_list, col_size_list, type_list, data_table, align=align, cell_align="left", header_titles=header_titles, border_list=None, sortable=True, clean=True) html.add_heading("Notes") - html.add_list(["DBS stands for DNA Binding Site on DNA.", - "DBS coverage is the proportion of the region where has potential to form triple helices with the given RNA."]) + html.add_list(["TTS stands for triplex target DNA site.", + "TTS coverage is the proportion of the region where has potential to form triple helices with the given RNA."]) html.add_fixed_rank_sortable() html.write(os.path.join(self.pars.o, "starget_regions.html")) ############################ # Subpages for targeted region centered page # region_dbs.html - header_list = ["RBS", "DBS", "Strand", "Score", "Motif", "Orientation", "Sequence"] - header_titles = ["", "RNA Binding Site", "DNA Binding Site", "Strand of DBS on DNA", + header_list = ["TFO", "TTS", "Strand", "Score", "Motif", "Orientation", "Sequence"] + header_titles = ["", "RNA Binding Site", "DNA Binding Site", "Strand of TTS on DNA", "Score of binding event", "Motif of binding by triple helix rule", "Orientation of interaction between DNA and RNA. 'P'- Parallel; 'A'-Antiparallel", "Binding Sequence between DNA and RNA"] @@ -1496,7 +1496,7 @@ def gen_html_regiontest(self): ["Organism", "-organism", self.pars.organism], ["Number of repitetion of andomization", "-n", str(self.pars.n)], ["Alpha level for rejection p value", "-a", str(self.pars.a)], - ["Cut off value for filtering out the low counts of DBSs", "-ccf", str(self.pars.ccf)], + ["Cut off value for filtering out the DBD with low counts of TTSs", "-ccf", str(self.pars.ccf)], ["Remove temporary files", "-rt", str(self.pars.rt)], ["Input BED file for masking in randomization", "-f", str(self.pars.f)], # ["Input file for RNA accecibility", "-ac", str(self.pars.ac)], diff --git a/rgt/tdf/Statistics.py b/rgt/tdf/Statistics.py index ef28d7b8b..1e0533af8 100644 --- a/rgt/tdf/Statistics.py +++ b/rgt/tdf/Statistics.py @@ -43,7 +43,7 @@ def __init__(self, pars): "RA_A": 0, "RA_G": 0, "YP_C": 0, "YP_T": 0, "uniq_MA_G": 0, "uniq_MA_T": 0, "uniq_MP_G": 0, "uniq_MP_T": 0, "uniq_RA_A": 0, "uniq_RA_G": 0, "uniq_YP_C": 0, "uniq_YP_T": 0, - "target_in_trans": 0, "traget_in_cis": 0, "target_local": 0, + "target_in_trans": 0, "target_in_cis": 0, "target_local": 0, "background_in_trans": 0, "background_in_cis": 0, "background_local": 0} def count_frequency_promoters(self, target_regions, background, file_tpx_de, file_tpx_nde): @@ -284,7 +284,7 @@ def write_stat(self, filename): "MA_G", "MA_T", "MP_G", "MP_T", "RA_A", "RA_G", "YP_C", "YP_T", "uniq_MA_G", "uniq_MA_T", "uniq_MP_G", "uniq_MP_T", "uniq_RA_A", "uniq_RA_G", "uniq_YP_C", "uniq_YP_T", - "target_in_trans", "traget_in_cis", "target_local", + "target_in_trans", "target_in_cis", "target_local", "background_in_trans", "background_in_cis", "background_local"] with open(filename, "w") as f: @@ -441,5 +441,5 @@ def target_stat(self, target_regions, tpx, tpxf): def distance_distribution(self, tpx): dis_count = tpx.distance_distribution() self.stat["target_in_trans"] = dis_count["in_trans"] - self.stat["traget_in_cis"] = dis_count["in_cis"] + self.stat["target_in_cis"] = dis_count["in_cis"] self.stat["target_local"] = dis_count["local"] \ No newline at end of file diff --git a/rgt/tdf/Triplexes.py b/rgt/tdf/Triplexes.py index 4274aa381..4409c0ab9 100644 --- a/rgt/tdf/Triplexes.py +++ b/rgt/tdf/Triplexes.py @@ -77,7 +77,7 @@ def __init__(self, organism, pars): self.pars = pars self.outdir = pars.o - def search_triplex(self, target_regions, prefix, remove_temp=False): + def search_triplex(self, target_regions, prefix, summary_file=False, remove_temp=False): # print(" \tRunning Triplexator...") rna_fasta = os.path.join(self.outdir, "rna_temp.fa") dna_fasta = os.path.join(self.outdir, prefix+".fa") @@ -88,7 +88,7 @@ def search_triplex(self, target_regions, prefix, remove_temp=False): run_triplexator(ss=rna_fasta, ds=dna_fasta, output=tpx_file, l=self.l, e=self.e, c=self.c, fr=self.fr, fm=self.fm, - of=self.of, mf=self.mf, rm=self.rm, par=self.pars.par) + of=self.of, mf=self.mf, rm=self.rm, par=self.pars.par, summary_file=summary_file) if remove_temp: os.remove(dna_fasta) @@ -107,7 +107,7 @@ def find_autobinding(self, rbss): self.autobinding.merge_rbs(rbss=rbss, rm_duplicate=False) def get_tpx(self, rna_fasta_file, target_regions, dna_fine_posi, prefix="", remove_temp=False, - autobinding=False): + autobinding=False, summary_file=False): """Given a GenomicRegionSet to run Triplexator and return the RNADNABindingSet""" # Generate FASTA save_sequence(dir=self.outdir, filename="targets_" + prefix + ".fa", @@ -116,13 +116,13 @@ def get_tpx(self, rna_fasta_file, target_regions, dna_fine_posi, prefix="", remo run_triplexator(ss=rna_fasta_file, ds=os.path.join(self.outdir, "targets_" + prefix + ".fa"), output=os.path.join(self.outdir, "targets_" + prefix + ".tpx"), l=self.l, e=self.e, c=self.c, fr=self.fr, fm=self.fm, of=self.of, - mf=self.mf, rm=self.rm, par=self.pars.par) + mf=self.mf, rm=self.rm, par=self.pars.par, summary_file=summary_file) # Autobinding if autobinding: run_triplexator(ss=rna_fasta_file, ds=os.path.join(self.outdir, "targets_" + prefix + ".fa"), output=os.path.join(self.outdir, "autobinding_" + prefix + ".txp"), l=self.l, e=self.e, c=self.c, fr=self.fr, fm=self.fm, of=self.of, - mf=self.mf, rm=self.rm, par=self.pars.par + "_auto-binding-file") + mf=self.mf, rm=self.rm, par=self.pars.par + "_auto-binding-file", summary_file=False) # Read txp tpx = RNADNABindingSet("targets") tpx.read_tpx(os.path.join(self.outdir, "targets_" + prefix + ".tpx"), dna_fine_posi=dna_fine_posi, seq=True) diff --git a/rgt/tdf/triplexTools.py b/rgt/tdf/triplexTools.py index f3539186a..38044a9fb 100644 --- a/rgt/tdf/triplexTools.py +++ b/rgt/tdf/triplexTools.py @@ -46,7 +46,7 @@ "associated_gene", "expression", "loci", "autobinding", "MA_G","MA_T","MP_G","MP_T","RA_A","RA_G","YP_C","YP_T", "uniq_MA_G", "uniq_MA_T", "uniq_MP_G", "uniq_MP_T", "uniq_RA_A", "uniq_RA_G", "uniq_YP_C", "uniq_YP_T", - "target_in_trans", "traget_in_cis", "target_local", + "target_in_trans", "target_in_cis", "target_local", "background_in_trans", "background_in_cis", "background_local"] # "Mix_Antiparallel_A", "Mix_Antiparallel_G", "Mix_Antiparallel_T", @@ -577,7 +577,7 @@ def find_triplex(rna_fasta, dna_region, temp, organism, l, e, dna_fine_posi, gen return txp -def run_triplexator(ss, ds, output, l=None, e=None, c=None, fr=None, fm=None, of=None, mf=None, rm=None, par="", autobinding=None): +def run_triplexator(ss, ds, output, l=None, e=None, c=None, fr=None, fm=None, of=None, mf=None, rm=None, par="", autobinding=None, summary_file=False): """Perform Triplexator""" #triplexator_path = check_triplexator_path() # triplexator -ss -ds -l 15 -e 20 -c 2 -fr off -fm 0 -of 1 -rm @@ -600,7 +600,7 @@ def run_triplexator(ss, ds, output, l=None, e=None, c=None, fr=None, fm=None, of if of: arguments += "-of "+str(of)+" " if mf: arguments += "-mf " if rm: arguments += "-rm "+str(rm)+" " - # arguments += "--bit-parallel " + arguments += "--bit-parallel -g 0 " if par != "": par = par.replace('_'," ") par = "-" + par @@ -617,7 +617,8 @@ def run_triplexator(ss, ds, output, l=None, e=None, c=None, fr=None, fm=None, of arg_ptr[i + 1] = s # print(arg_strings) triplex_lib.pyTriplexator(len(arg_strings) + 1, arg_ptr) - silentremove(os.path.join(output + ".summary")) + if not summary_file: + silentremove(os.path.join(output + ".summary")) silentremove(os.path.join(output + ".log")) diff --git a/rgt/viz/Main.py b/rgt/viz/Main.py index 78573714e..417fc0767 100644 --- a/rgt/viz/Main.py +++ b/rgt/viz/Main.py @@ -723,7 +723,7 @@ def main(): boxplot.color_map(colorby=args.c, definedinEM=args.color) boxplot.plot(title=args.t, logT=args.nlog, scol=args.scol, ylim=args.ylim, pw=args.pw, ph=args.ph) if args.table: - boxplot.print_table(directory=args.o, folder=args.t) + boxplot.print_plot_table(directory=args.o, folder=args.t) output(f=boxplot.fig, directory=args.o, folder=args.t, filename="boxplot", extra=matplotlib.pyplot.gci(), pdf=True, show=args.show) diff --git a/rgt/viz/boxplot.py b/rgt/viz/boxplot.py index 6cb93400e..612f4249f 100644 --- a/rgt/viz/boxplot.py +++ b/rgt/viz/boxplot.py @@ -130,15 +130,15 @@ def tables_for_plot(self): def print_plot_table(self, directory, folder): for i, bed in enumerate(self.tableDict.keys()): - # table = [] - # header = ["chrom", "initial", "final"] - # for rp in self.reads: - # header.append(os.path.basename(rp)) - # table.append(header) - # for j, re in enumerate(self.beds[i]): - # table.append([re.chrom, re.initial, re.final] + self.tableDict[bed][j].tolist()) + table = [] + header = ["loci"] + for rp in self.reads: + header.append(os.path.basename(rp)) + table.append(header) + for j, re in enumerate(self.beds[i]): + table.append(["_".join([re.chrom, str(re.initial), str(re.final)])] + self.tableDict[bed][j].tolist()) # output_array(table, directory, folder, filename="table_" + bed + ".txt") - output_array(self.tableDict[bed], directory, folder, filename="table_" + bed + ".txt") + output_array(table, directory, folder, filename="table_" + bed + ".txt") def group_tags(self, groupby, sortby, colorby): """Generate the tags for the grouping of plot @@ -223,24 +223,24 @@ def color_map(self, colorby, definedinEM): self.colors = colormap(self.exps, colorby, definedinEM) def print_table(self, directory, folder): - - # self.printtable = OrderedDict() - table = [] - maxn = 0 - # table.append(["#group_tag", "sort_tag", "color_tag", "Signals"]) - for i, g in enumerate(self.group_tags): - for k, a in enumerate(self.sort_tags): - for j, c in enumerate(self.color_tags): - table.append([g, a, c] + [str(x) for x in self.sortDict[g][a][c]]) - c = len(self.sortDict[g][a][c]) + 3 - if c > maxn: maxn = c - for i, t in enumerate(table): - if len(t) < maxn: - table[i] = t + ["n.a."] * (maxn - len(t)) + self.print_plot_table(directory,folder) + # # self.printtable = OrderedDict() + # table = [] + # maxn = 0 + # # table.append(["#group_tag", "sort_tag", "color_tag", "Signals"]) + # for i, g in enumerate(self.group_tags): + # for k, a in enumerate(self.sort_tags): + # for j, c in enumerate(self.color_tags): + # table.append([g, a, c] + [str(x) for x in self.sortDict[g][a][c]]) + # c = len(self.sortDict[g][a][c]) + 3 + # if c > maxn: maxn = c + # for i, t in enumerate(table): + # if len(t) < maxn: + # table[i] = t + ["n.a."] * (maxn - len(t)) # print - output_array(numpy.array([list(x) for x in zip(*table)]), directory, folder, filename="output_table.txt") + # output_array(numpy.array([list(x) for x in zip(*table)]), directory, folder, filename="output_table.txt") def plot(self, title, scol, logT=False, ylim=False, pw=3, ph=4): """ Return boxplot from the given tables. @@ -308,7 +308,7 @@ def plot(self, title, scol, logT=False, ylim=False, pw=3, ph=4): self.xtickrotation = 70 self.xtickalign = "right" for k, c in enumerate(self.sortDict[g][a].keys()): - if not self.sortDict[g][a][c]: # When there is no matching data, skip it + if not numpy.any(self.sortDict[g][a][c]): # When there is no matching data, skip it continue else: if self.df: From 09a0cb96b9b5941f57cdc2c75226740cb0c45bfc Mon Sep 17 00:00:00 2001 From: jovesus Date: Mon, 4 Feb 2019 16:21:41 +0100 Subject: [PATCH 4/6] modified: data/setupGenomicData.py modified: rgt/tdf/Main.py modified: test/tdf.sh --- data/setupGenomicData.py | 7 +++--- rgt/tdf/Main.py | 48 ++++++++++++++++++++-------------------- test/tdf.sh | 13 +++++------ 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/data/setupGenomicData.py b/data/setupGenomicData.py index f6dd957a1..cdc128d19 100644 --- a/data/setupGenomicData.py +++ b/data/setupGenomicData.py @@ -242,6 +242,7 @@ def download(url, prefix, output=None): system("ln -s " + options.mm9_gtf_path + " " + gtf_output_file_name) print("OK") else: + gtf_url = gencode_url + "Gencode_mouse/release_M1/gencode.vM1.annotation.gtf.gz" gtf_output_file_name_gz = path.join(output_location, "gencode.vM1.annotation.gtf.gz") if path.isfile(gtf_output_file_name_gz): remove(gtf_output_file_name_gz) @@ -285,14 +286,14 @@ def download(url, prefix, output=None): output_genome_file.close() # Fetching GTF - gtf_output_file_name = path.join(output_location, "gencode.vM11.annotation.gtf") + gtf_output_file_name = path.join(output_location, "gencode.vM20.annotation.gtf") if options.mm10_gtf_path: print("Creating symbolic link to MM10 GTF") system("ln -s " + options.mm10_gtf_path + " " + gtf_output_file_name) print("OK") else: - gtf_url = gencode_url + "Gencode_mouse/release_M11/gencode.vM11.annotation.gtf.gz" - gtf_output_file_name_gz = path.join(output_location, "gencode.vM11.annotation.gtf.gz") + gtf_url = gencode_url + "Gencode_mouse/release_M20/gencode.vM20.annotation.gtf.gz" + gtf_output_file_name_gz = path.join(output_location, "gencode.vM20.annotation.gtf.gz") if path.isfile(gtf_output_file_name_gz): remove(gtf_output_file_name_gz) print("Downloading MM10 GTF (gene annotation)") download(gtf_url, output_location) diff --git a/rgt/tdf/Main.py b/rgt/tdf/Main.py index 215d9ad16..8f620c2e1 100644 --- a/rgt/tdf/Main.py +++ b/rgt/tdf/Main.py @@ -36,8 +36,8 @@ Author: Joseph C.C. Kuo -Triplexator -https://github.com/zbarni/triplexator +Triplexes +https://github.com/CostaLab/Triplexes Author: Barna Zajzon """ @@ -65,7 +65,7 @@ def main(): parser_promotertest.add_argument('-bed', default=False, metavar=' ', help="Input BED file of the promoter regions of target genes") parser_promotertest.add_argument('-bg', default=False, metavar=' ', help="Input BED file of the promoter regions of background genes") parser_promotertest.add_argument('-o', metavar=' ', help="Output directory name for all the results") - parser_promotertest.add_argument('-t', metavar=' ', default=False, help="Define the title name for the results under the Output name. (default: %(default)s)") + parser_promotertest.add_argument('-t', metavar=' ', default=False, help="Define the title name for the results under the Output name. (default is RNA name)") parser_promotertest.add_argument('-organism', metavar=' ', help='Define the organism') parser_promotertest.add_argument('-gtf', metavar=' ', default=None, help='Define the GTF file for annotation (optional)') @@ -87,7 +87,7 @@ def main(): parser_promotertest.add_argument('-filter_havana', type=str, default="F", metavar=' ', help="Apply filtering to remove HAVANA entries.") parser_promotertest.add_argument('-protein_coding', type=str, default="F", metavar=' ', help="Apply filtering to get only protein coding genes.") parser_promotertest.add_argument('-known_only', type=str, default="F", metavar=' ', help="Apply filtering to get only known genes.") - parser_promotertest.add_argument('-dump', action="store_true", default=False, help="Only dump the experimental file and leave the program.") + parser_promotertest.add_argument('-dump', action="store_true", default=False, help="Only dump the preprocessed file and leave the program.") parser_promotertest.add_argument('-rnaexp', type=str, default=None, metavar=' ', help="Given a file with RNA name and the expression value") parser_promotertest.add_argument('-nofile', action="store_true", default=False, help="Don't save any files in the output folder, except the statistics.") @@ -96,10 +96,10 @@ def main(): parser_promotertest.add_argument('-c', type=int, default=2, metavar=' ', help="[Triplexes] Sets the tolerated number of consecutive errors with respect to the canonical triplex rules as such were found to greatly destabilize triplexes in vitro (default: %(default)s)") parser_promotertest.add_argument('-fr', type=str, default="off", metavar=' ', help="[Triplexes] Activates the filtering of low complexity regions and repeats in the sequence data (default: %(default)s)") parser_promotertest.add_argument('-fm', type=int, default=0, metavar=' ', help="[Triplexes] Method to quickly discard non-hits (default: %(default)s).'0' = greedy approach; '1' = q-gram filtering.") - parser_promotertest.add_argument('-of', type=int, default=1, metavar=' ', help="[Triplexes] Define output formats of Triplexator (default: %(default)s)") + parser_promotertest.add_argument('-of', type=int, default=1, metavar=' ', help="[Triplexes] Define output formats of Triplexes (default: %(default)s)") parser_promotertest.add_argument('-mf', action="store_true", default=False, help="[Triplexes] Merge overlapping features into a cluster and report the spanning region.") parser_promotertest.add_argument('-rm', type=int, default=2, metavar=' ', help="[Triplexes] Set the multiprocessing") - parser_promotertest.add_argument('-par', type=str, default="", metavar=' ', help="[Triplexes] Define other parameters for Triplexator") + parser_promotertest.add_argument('-par', type=str, default="", metavar=' ', help="[Triplexes] Define other parameters for Triplexes") ################### Genomic Region Test ########################################## h_region = "Genomic region test evaluates the association between the given lncRNA to the target regions by randomization." @@ -109,7 +109,7 @@ def main(): parser_randomtest.add_argument('-rn', type=str, default=False, metavar=' ', help="Define the RNA name") parser_randomtest.add_argument('-bed', metavar=' ', help="Input BED file for interested regions on DNA") parser_randomtest.add_argument('-o', metavar=' ', help="Output directory name for all the results and temporary files") - parser_randomtest.add_argument('-t', metavar=' ', default=False, help="Define the title name for the results under the Output name. (default: %(default)s)") + parser_randomtest.add_argument('-t', metavar=' ', default=False, help="Define the title name for the results under the Output name. (default is RNA name)") parser_randomtest.add_argument('-n', type=int, default=10000, metavar=' ', help="Number of times for randomization (default: %(default)s)") @@ -135,16 +135,16 @@ def main(): parser_randomtest.add_argument('-c', type=int, default=2, metavar=' ', help="[Triplexes] Sets the tolerated number of consecutive errors with respect to the canonical triplex rules as such were found to greatly destabilize triplexes in vitro (default: %(default)s)") parser_randomtest.add_argument('-fr', type=str, default="off", metavar=' ', help="[Triplexes] Activates the filtering of low complexity regions and repeats in the sequence data (default: %(default)s)") parser_randomtest.add_argument('-fm', type=int, default=0, metavar=' ', help="[Triplexes] Method to quickly discard non-hits (default: %(default)s).'0' = greedy approach; '1' = q-gram filtering.") - parser_randomtest.add_argument('-of', type=int, default=1, metavar=' ', help="[Triplexes] Define output formats of Triplexator (default: %(default)s)") + parser_randomtest.add_argument('-of', type=int, default=1, metavar=' ', help="[Triplexes] Define output formats of Triplexes (default: %(default)s)") parser_randomtest.add_argument('-mf', action="store_true", default=False, help="[Triplexes] Merge overlapping features into a cluster and report the spanning region.") parser_randomtest.add_argument('-rm', type=int, default=2, metavar=' ', help="[Triplexes] Set the multiprocessing") - parser_randomtest.add_argument('-par', type=str, default="", metavar=' ', help="[Triplexes] Define other parameters for Triplexator") + parser_randomtest.add_argument('-par', type=str, default="", metavar=' ', help="[Triplexes] Define other parameters for Triplexes") ########################################################################## - parser_bed2bed = subparsers.add_parser('get_dbss', help="Get DBSs in BED format from the single BED file") + parser_bed2bed = subparsers.add_parser('get_ttss', help="Get TTSs in BED format from the single BED file") parser_bed2bed.add_argument('-i',type=str, metavar=' ', help='Input BED file of the target regions') - parser_bed2bed.add_argument('-dbs',type=str, metavar=' ', help='Output BED file of the DBSs') - parser_bed2bed.add_argument('-rbs',type=str, metavar=' ', help='Output BED file of the RBSs') + parser_bed2bed.add_argument('-tts',type=str, metavar=' ', help='Output BED file of the TTSs') + parser_bed2bed.add_argument('-tfo',type=str, metavar=' ', help='Output BED file of the TFOs') parser_bed2bed.add_argument('-r',type=str, metavar=' ', help='Input FASTA file of the RNA') parser_bed2bed.add_argument('-organism', metavar=' ', help='Define the organism') parser_bed2bed.add_argument('-l', type=int, default=20, metavar=' ', help="[Triplexes] Define the minimum length of triplex (default: %(default)s)") @@ -152,7 +152,7 @@ def main(): parser_bed2bed.add_argument('-c', type=int, default=2, metavar=' ', help="[Triplexes] Sets the tolerated number of consecutive errors with respect to the canonical triplex rules as such were found to greatly destabilize triplexes in vitro (default: %(default)s)") parser_bed2bed.add_argument('-fr', type=str, default="off", metavar=' ', help="[Triplexes] Activates the filtering of low complexity regions and repeats in the sequence data (default: %(default)s)") parser_bed2bed.add_argument('-fm', type=int, default=0, metavar=' ', help="[Triplexes] Method to quickly discard non-hits (default: %(default)s).'0' = greedy approach; '1' = q-gram filtering.") - parser_bed2bed.add_argument('-of', type=int, default=1, metavar=' ', help="[Triplexes] Define output formats of Triplexator (default: %(default)s)") + parser_bed2bed.add_argument('-of', type=int, default=1, metavar=' ', help="[Triplexes] Define output formats of Triplexes (default: %(default)s)") parser_bed2bed.add_argument('-mf', action="store_true", default=False, help="[Triplexes] Merge overlapping features into a cluster and report the spanning region.") parser_bed2bed.add_argument('-rm', type=int, default=0, metavar=' ', help="[Triplexes] Set the multiprocessing") @@ -162,9 +162,9 @@ def main(): parser_integrate.add_argument('-path',type=str, metavar=' ', help='Define the path of the project.') parser_integrate.add_argument('-exp', action="store_true", default=False, help='Include expression score for ranking.') ########################################################################## - parser_updatehtml = subparsers.add_parser('updatehtml', help="Update the project's html.") - parser_updatehtml.add_argument('-path',type=str, metavar=' ', help='Define the path of the project.') - parser_updatehtml.add_argument('-exp', type=str, metavar=' ', help='Define file with expression data.') + # parser_updatehtml = subparsers.add_parser('updatehtml', help="Update the project's html.") + # parser_updatehtml.add_argument('-path',type=str, metavar=' ', help='Define the path of the project.') + # parser_updatehtml.add_argument('-exp', type=str, metavar=' ', help='Define file with expression data.') ################### Parsing the arguments ################################ if len(sys.argv) == 1: @@ -244,14 +244,14 @@ def main(): #################################################################################### ######### updatehtml - elif args.mode == "updatehtml": - for item in os.listdir(args.path): - pro = os.path.join(args.path, item, "profile.txt") - if os.path.isfile(pro): update_profile(dirpath=os.path.join(args.path, item), - expression=args.exp) - revise_index(root=args.path) - generate_rna_exp_pv_table(root=args.path, multi_corr=True) - sys.exit(0) + # elif args.mode == "updatehtml": + # for item in os.listdir(args.path): + # pro = os.path.join(args.path, item, "profile.txt") + # if os.path.isfile(pro): update_profile(dirpath=os.path.join(args.path, item), + # expression=args.exp) + # revise_index(root=args.path) + # generate_rna_exp_pv_table(root=args.path, multi_corr=True) + # sys.exit(0) #################################################################################### ######### get_dbss diff --git a/test/tdf.sh b/test/tdf.sh index df1429c95..df85bad34 100755 --- a/test/tdf.sh +++ b/test/tdf.sh @@ -18,20 +18,19 @@ then echo "$file found." else echo "$file not found." -wget -qO- -O TDF_examples.zip http://costalab.org/files/tdf/TDF_examples.zip && unzip TDF_examples.zip && rm TDF_examples.zip +curl -k -O https://costalab.ukaachen.de/open_data/TDF/TDF_examples.zip && unzip TDF_examples.zip && rm TDF_examples.zip fi + # Run test script cd ${DIR}/TDF_examples/FENDRR_mm9/ -rgt-TDF promotertest -r FENDRR.fasta -de fendrr_gene_list.txt -organism mm9 -rn FENDRR -o promoter_test/ -l 15 -rgt-TDF promotertest -r FENDRR.fasta -de fendrr_gene_list_fold_change.txt -score -organism mm9 -rn FENDRR -o promoter_test -t FENDRR_FC/ -l 15 +#rgt-TDF promotertest -r FENDRR.fasta -de fendrr_gene_list.txt -organism mm9 -rn FENDRR -o promoter_test/ -l 15 +rgt-TDF promotertest -r FENDRR.fasta -de fendrr_gene_list_fold_change.txt -score -organism mm9 -rn FENDRR -o promoter_test -t FENDRR_FC/ -l 20 rgt-TDF integrate -path promoter_test -cd ${DIR}/TDF_examples/TERC_hg19/ - -rgt-TDF regiontest -r terc.fasta -bed terc_peaks.bed -rn TERC -f Nregions_hg19.bed -organism hg19 -l 15 -o genomic_region_test/ -n 10 -mp 5 +cd ${DIR}/TDF_examples/MEG3_hg38/ +rgt-TDF regiontest -r MEG3_sequence.fa -bed MEG3_hg38_CHOP.bed -rn MEG3 -o genomic_region_test -n 100 -organism hg38 -l 14 -mp 5 -ccf 100 rgt-TDF integrate -path genomic_region_test -# rgt-TDF regiontest -r terc.fasta -bed terc_peaks.bed -rn TERC -f Nregions_hg19.bed -organism hg19 -l 8 -o genomic_region_test/ -n 10 -mp 5 echo "********* TDF test completed ****************" From c3089fdd1ba1ffbf94ef058e015c44537d56c496 Mon Sep 17 00:00:00 2001 From: jovesus Date: Tue, 12 Feb 2019 13:29:44 +0100 Subject: [PATCH 5/6] modified: rgt/__version__.py --- rgt/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rgt/__version__.py b/rgt/__version__.py index 1a7d67496..7dee54b67 100644 --- a/rgt/__version__.py +++ b/rgt/__version__.py @@ -1,2 +1,2 @@ 0 -__version__ = "0.11.5" +__version__ = "0.11.6" From 340174b35c7895cb44cd1f64eb809e03e5824fb5 Mon Sep 17 00:00:00 2001 From: jovesus Date: Tue, 12 Feb 2019 14:01:38 +0100 Subject: [PATCH 6/6] modified: setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 928ea2420..203a32f3f 100644 --- a/setup.py +++ b/setup.py @@ -137,7 +137,7 @@ def find_version(*file_paths): "TDF": ( "rgt-TDF", "rgt.tdf.Main:main", - ["matplotlib>=1.1.0", "natsort"], + ["matplotlib>=1.1.0", "natsort", "pyBigWig==0.3.12"], [] ) }