-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnode_blast.sh
executable file
·63 lines (49 loc) · 2.09 KB
/
node_blast.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/bin/bash
#
#SBATCH --job-name=blast
#SBATCH --output=job_out/blast_%j.out # Standard output
#SBATCH --error=job_out/blast_%j.err # Standard error
#SBATCH --time=20:00:00
#SBATCH --cpus-per-task=1
#SBATCH --mem=20G
#SBATCH --account pmg
# Load configuration
source config/config_blast.cfg
source /burg/pmg/users/korem_lab/miniforge3/etc/profile.d/conda.sh
conda activate blast
# Check if refGenome argument is provided
if [ -z "$1" ]; then
echo "Error: No reference genome provided. Usage: ./blast_script.sh <refGenome.fasta>"
exit 1
fi
# Reference genome (passed as argument)
refGenome=$1
refGenomeName=$(basename "${refGenome}" .fasta)
# Define the nodes fasta file (query file)
nodes_fasta="${copangraphDir}/${graphID}.fasta"
# Define the output directory for BLAST results
blast_out_dir="${blastOutDir}"
# Construct the full path for the current BLAST database
blast_db="${dbDir}/${refGenomeName}_blast_db"
# Define the output file for this BLAST run
blast_out="${blast_out_dir}/${graphID}_blast_results_${refGenomeName}_coverage${coverage}_percid${percid}_seed${wordSize}.out"
# Check if BLAST DB exists; if not, create it
if [ ! -f "${blast_db}.nhr" ]; then
echo "Creating BLAST nucleotide database for ${refGenomeName}..."
makeblastdb -in "${refGenome}" -dbtype nucl -out "${blast_db}" -title "${refGenomeName}_Reference_Genome_DB"
if [ $? -ne 0 ]; then
echo "Error: Failed to create BLAST database for ${refGenomeName}"
exit 1
fi
else
echo "BLAST nucleotide database for ${refGenomeName} already exists. Skipping creation."
fi
# Run BLAST of nodes against the current reference genome database
if [ ! -f "${blast_out}" ]; then
echo "Running BLAST search of nodes against ${refGenomeName} database..."
blastn -query "${nodes_fasta}" -db "${blast_db}" -out "${blast_out}" \
-perc_identity "${percidThreshold}" -word_size "${wordSize}" -qcov_hsp_perc "${coverage}" -outfmt 6
echo "BLAST search complete for ${refGenomeName}. Results saved to ${blast_out}"
else
echo "BLAST output for ${refGenomeName} already exists. Skipping BLAST search."
fi