Skip to content

Commit

Permalink
#3: min. complexity
Browse files Browse the repository at this point in the history
  • Loading branch information
Vyacheslav Brover committed Feb 19, 2025
1 parent 6d1a403 commit 370339b
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions genetics/marker2qual.sh
Original file line number Diff line number Diff line change
@@ -1,30 +1,32 @@
#!/bin/bash --noprofile
THIS=$( dirname $0 )
source $THIS/../bash_common.sh
if [ $# -ne 3 ]; then
if [ $# -ne 4 ]; then
echo "Print a good quality subset of eukaryotic marker proteins created by tblastn2marker_euk.sh"
echo "#1: marker proteins (FASTA)"
echo "#2: min. score to length ratio"
echo "#3: output uniKernel file | ''"
echo "#3: min. complexity"
echo "#4: output uniKernel file | ''"
exit 1
fi
M=$1
T=$2
UNI=$3
COMPL=$3
UNI=$4


TMP=$( mktemp )


grep '^>' $M | cut -f 1,7 -d ' '| sed 's/^>//1' | sed 's/ score=/\t/1' > $TMP.score
$THIS/fasta2len $M > $TMP.len
paste $TMP.len $TMP.score | awk -F '\t' '{OFS="\t"; print $1, $4/$2};' > $TMP.stat
paste $TMP.len $TMP.score | awk -F '\t' '{OFS="\t"; print $1, $4 / $2};' > $TMP.stat
if [ "$UNI" ]; then
$THIS/../dm/conversion/cols2dm.sh $TMP.stat 0 5 1 > $TMP.dm
$THIS/../dm/uniKernel $TMP "V2" -qc > $UNI
fi
awk '$2 > '$T $TMP.stat | cut -f 1 > $TMP.list
$THIS/filterFasta $M -aa -target $TMP.list -len_min 20 -complexity_min 3
$THIS/filterFasta $M -aa -target $TMP.list -len_min 20 -complexity_min $COMPL


rm $TMP*

0 comments on commit 370339b

Please sign in to comment.