Skip to content

Commit

Permalink
Merge pull request #72 from MicrobialDarkMatter/read_level_methylation
Browse files Browse the repository at this point in the history
Read level methylation
  • Loading branch information
SebastianDall authored Sep 19, 2024
2 parents 273c2e8 + 78026ce commit f20dcfe
Show file tree
Hide file tree
Showing 9 changed files with 647 additions and 346 deletions.
2 changes: 1 addition & 1 deletion nanomotif/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.4.12"
__version__ = "0.4.13"
2 changes: 2 additions & 0 deletions nanomotif/argparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def create_parser():
parser_shared_find_motifs.add_argument("--threshold_valid_coverage", type=int, default=5, help="minimum valid base coverage for a position to be considered. Default: %(default)s")
parser_shared_find_motifs.add_argument("--minimum_kl_divergence", type=float, default=0.05, help="minimum KL-divergence for a position to considered for expansion in motif search. Higher value means less exhaustive, but faster search. Default: %(default)s")
parser_shared_find_motifs.add_argument("--min_motifs_contig", type=int, default=20, help="minimum number of times a motif has to have been oberserved in a contig. Default: %(default)s")
parser_shared_find_motifs.add_argument("--read_level_methylation", action="store_true", help="If specified, methylation is calculated on read level instead of contig level. This is slower but produces more stable motifs.")
parser_shared_find_motifs.add_argument("--min_motif_score", type=float, default=0.2, help="minimum score for a motif to be kept after identification considered valid. Default: %(default)s")
parser_find_motifs = subparsers.add_parser(
'find_motifs',
parents=[parser_positional, parser_optional, parser_shared_find_motifs],
Expand Down
2 changes: 1 addition & 1 deletion nanomotif/bin_consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def convert_motifs_to_regex(motifs):
#contig_motifs = nm.postprocess.remove_sub_motifs(motifs_scored_filt)

contig_motifs = motifs_scored_filt.with_columns(
pl.col("motif").apply(lambda x: nm.seq.regex_to_iupac(x)).alias("motif"),
pl.col("motif").map_elements(lambda x: nm.seq.regex_to_iupac(x), return_dtype = pl.Utf8).alias("motif"),
(pl.col("n_mod") / (pl.col("n_mod") + pl.col("n_nomod"))).alias("mean")
)
bin_motifs = contig_motifs.groupby("bin", "motif", "mod_position", "mod_type") \
Expand Down
16 changes: 16 additions & 0 deletions nanomotif/dataload.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,22 @@ def load_pileup(path: str, threads: int = 1, min_fraction: float = 0):
pileup = pileup.rename({"column_1":"contig", "column_2": "position", "column_4": "mod_type", "column_6": "strand", "column_11": "fraction_mod", "column_10":"Nvalid_cov"})
return Pileup(pileup)

def load_low_coverage_positions(path_pileup: str, threads: int = 1, min_coverage: float = 5):
"""
Load pileup file from path to pileup.bed output of modkit pileup
"""
pileup = (
pl.scan_csv(path_pileup, separator = "\t", has_header = False)
.filter(pl.col("column_10") <= min_coverage)
.filter(pl.col("column_10") / (pl.col("column_10") + pl.col("column_17")) > 0.3)
.select(["column_1", "column_2","column_4", "column_6", "column_11", "column_10"])
.with_columns(pl.col("column_11") / 100)
.collect()
)
pileup = pileup.rename({"column_1":"contig", "column_2": "position", "column_4": "mod_type", "column_6": "strand", "column_11": "fraction_mod", "column_10":"Nvalid_cov"})
return pileup


def load_assembly(path: str):
"""
Load assembly from path to fasta file
Expand Down
Loading

0 comments on commit f20dcfe

Please sign in to comment.