From a895521ffccaca2b4b4ab3918e14a509a03d0360 Mon Sep 17 00:00:00 2001 From: Vyacheslav Brover Date: Mon, 12 Aug 2024 15:15:35 -0400 Subject: [PATCH] Parameter #3: subset --- genetics/dna_closest.sh | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/genetics/dna_closest.sh b/genetics/dna_closest.sh index 16ffc4d7..db5ba5ca 100755 --- a/genetics/dna_closest.sh +++ b/genetics/dna_closest.sh @@ -1,21 +1,34 @@ #!/bin/bash --noprofile THIS=`dirname $0` source $THIS/../bash_common.sh -if [ $# -ne 2 ]; then +if [ $# -ne 3 ]; then echo "Return: top 100 BLASTN hits in top strand" echo "#1; query DNA sequence" echo "#2: subject DNA BLAST database" + echo "#3: subset of sequence id's or ''" exit 1 fi QUERY=$1 DB=$2 +SUBSET=$3 -TMP=`mktemp` +TMP=$( mktemp ) +#comment $TMP -blastn -db $DB -query $QUERY -strand plus -task blastn -num_threads 5 -outfmt '6 sseqid nident' | sort -k 2 -n -r | cut -f 1 > $TMP -head -100 $TMP | sort -u +blastn -db $DB -query $QUERY -strand plus -task blastn -num_threads 5 -outfmt '6 sseqid nident' | sort -u > $TMP + +# $TMP -> $TMP.inter +if [ $SUBSET ]; then + sort -cu $SUBSET + join -1 1 -2 1 $TMP $SUBSET | tr ' ' '\t' > $TMP.inter +else + mv $TMP $TMP.inter +fi + +sort -k2nr $TMP.inter | cut -f 1 > $TMP.sorted +head -100 $TMP.sorted rm $TMP*