Skip to content

Commit

Permalink
Move get_circular_pattern() to edgecaselib.util
Browse files Browse the repository at this point in the history
  • Loading branch information
LankyCyril committed Oct 8, 2020
1 parent f258ad4 commit 07d40b8
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 16 deletions.
15 changes: 1 addition & 14 deletions edgecaselib/kmerscanner.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from sys import stdout, stderr
from regex import compile, IGNORECASE
from numpy import zeros, array, cumsum, nan
from multiprocessing import Pool
from edgecaselib.util import get_circular_pattern
from edgecaselib.formats import filter_bam
from edgecaselib.tailchopper import get_cigar_clip_length
from pysam import AlignmentFile, FastxFile
Expand Down Expand Up @@ -67,19 +67,6 @@
)


def get_circular_pattern(motif, repeats=2):
"""Convert motif into circular regex pattern (e.g., r'TCGA|CGAT|GATC|ATCG' for TCGA)"""
atom_pattern = compile(r'[ACGT.]|\[[ACGT]+\]', flags=IGNORECASE)
atoms = atom_pattern.findall(motif)
if "".join(atoms) != motif:
raise ValueError("Could not parse motif: {}".format(motif))
repeated_inversions = {
"".join(atoms[i:] + atoms[:i]) * repeats
for i in range(len(atoms))
}
return compile(r'|'.join(repeated_inversions), flags=IGNORECASE)


def get_edge_density(entry, pattern, head_test, tail_test):
"""Calculate density of pattern in head_test or tail_test of read"""
if (entry.query_sequence is None) or (len(entry.query_sequence) == 0):
Expand Down
2 changes: 1 addition & 1 deletion edgecaselib/repeatfinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from pysam import AlignmentFile, FastxFile
from os import path
from edgecaselib.util import get_executable, progressbar, revcomp
from edgecaselib.util import get_circular_pattern
from edgecaselib.formats import filter_bam
from edgecaselib.kmerscanner import get_circular_pattern
from functools import lru_cache
from subprocess import check_output
from pandas import read_csv, concat
Expand Down
15 changes: 14 additions & 1 deletion edgecaselib/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from sys import stderr
from regex import compile
from regex import compile, IGNORECASE as REGEX_IGNORECASE
from re import split, search, IGNORECASE
from shutil import which
from os import path, access, X_OK
Expand Down Expand Up @@ -50,6 +50,19 @@ def motif_revcomp(motif, ignorecase=True):
raise ValueError("Unsupported character(s) in motif: {}".format(motif))


def get_circular_pattern(motif, repeats=2):
"""Convert motif into circular regex pattern (e.g., r'TCGA|CGAT|GATC|ATCG' for TCGA)"""
atom_pattern = compile(r'[ACGT.]|\[[ACGT]+\]', flags=REGEX_IGNORECASE)
atoms = atom_pattern.findall(motif)
if "".join(atoms) != motif:
raise ValueError("Could not parse motif: {}".format(motif))
repeated_inversions = {
"".join(atoms[i:] + atoms[:i]) * repeats
for i in range(len(atoms))
}
return compile(r'|'.join(repeated_inversions), flags=REGEX_IGNORECASE)


def chromosome_natsort(chrom):
"""Natural order sorting that undestands chr1, 4, chr10, chr14_K*, 7ptel etc"""
keyoder = []
Expand Down

0 comments on commit 07d40b8

Please sign in to comment.