-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
104 changed files
with
18,164 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
42 changes: 42 additions & 0 deletions
42
Project_Template/00.Gamete_Reads/RecombineX.00.Retrieve_SRA_Reads.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#!/bin/bash | ||
set -e -o pipefail | ||
|
||
####################################### | ||
# load environment variables for RecombineX | ||
source ./../../env.sh | ||
|
||
####################################### | ||
# set project-specific variables | ||
sra_run_list="sample2reads_map.txt" # A simple tab separated file with two columns, in which the first column contains the sample name and the sencond column contains the corresponding SRR id. Lines started with "#" will be ignored. Default = "sample2reads_map.txt". | ||
####################################### | ||
|
||
|
||
|
||
|
||
|
||
####################################### | ||
# process the pipeline | ||
|
||
while read -r line | ||
do | ||
[[ $line == \#* ]] && continue | ||
[[ $line == "" ]] && continue | ||
IFS=$'\t' read -r sample_id srr_id <<<"$line" | ||
echo "retrieve reads by the SRR_id: $srr_id for the sample $sample_id ..." | ||
$sra_dir/fastq-dump --defline-seq '@$sn[_$rn]/$ri' --defline-qual '+$sn[_$rn]/$ri' \ | ||
--gzip --split-files -skip-technical --dumpbase --read-filter pass --clip $srr_id | ||
mv ${srr_id}_pass_1.fastq.gz $sample_id.R1.fq.gz | ||
mv ${srr_id}_pass_2.fastq.gz $sample_id.R2.fq.gz | ||
done < $sra_run_list | ||
|
||
############################ | ||
# checking bash exit status | ||
if [[ $? -eq 0 ]] | ||
then | ||
echo "" | ||
echo "RecombineX message: This bash script has been successfully processed! :)" | ||
echo "" | ||
echo "" | ||
exit 0 | ||
fi | ||
############################ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
## data source: https://www.ncbi.nlm.nih.gov/bioproject/?term=PRJNA309059 | ||
## https://www.ncbi.nlm.nih.gov/sra?linkname=bioproject_sra_all&from_uid=309059 | ||
# sample_id SRR_id | ||
AND1702-8A SRR2984859 | ||
AND1702-8B SRR2984860 | ||
AND1702-8C SRR2984861 | ||
AND1702-8D SRR2984862 | ||
AND1702-9A SRR2984863 | ||
AND1702-9B SRR2984864 | ||
AND1702-9C SRR2984865 | ||
AND1702-9D SRR2984866 | ||
AND1702-10A SRR2984867 | ||
AND1702-10B SRR2984868 | ||
AND1702-10C SRR2984869 | ||
AND1702-10D SRR2984870 | ||
AND1702-11A SRR2984871 | ||
AND1702-11B SRR2984872 | ||
AND1702-11C SRR2984873 | ||
AND1702-11D SRR2984874 | ||
AND1702-12A SRR2984875 | ||
AND1702-12B SRR2984876 | ||
AND1702-12C SRR2984877 | ||
AND1702-12D SRR2984878 |
48 changes: 48 additions & 0 deletions
48
Project_Template/00.Parent_Genomes/RecombineX.00.Prepare_Sample_Parent_Genomes.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/bin/bash | ||
set -e -o pipefail | ||
|
||
####################################### | ||
# load environment variables for RecombineX | ||
source ./../../env.sh | ||
|
||
####################################### | ||
# set project-specific variables | ||
|
||
# none | ||
|
||
####################################### | ||
# process the pipeline | ||
|
||
echo "retrieve sample parental genome data ..." | ||
for i in S288C SK1 | ||
do | ||
cp $RECOMBINEX_HOME/data/$i.genome.fa . | ||
cp $RECOMBINEX_HOME/data/$i.all_feature.gff . | ||
perl $RECOMBINEX_HOME/scripts/filter_gff_by_feature.pl -i $i.all_feature.gff -o $i.centromere.gff -f centromere -m keep | ||
done | ||
|
||
# echo "retrieve sample subtelomere GFF files ..." | ||
# for i in S288C SK1 | ||
# do | ||
# cp $RECOMBINEX_HOME/data/Saccharomyces_cerevisiae_subtelomere_gff3/$i.subtelomere.gff . | ||
# done | ||
|
||
echo "" | ||
echo "removing intermediate files and directories ..." | ||
for i in S288C SK1 | ||
do | ||
rm $i.all_feature.gff | ||
done | ||
|
||
|
||
############################ | ||
# checking bash exit status | ||
if [[ $? -eq 0 ]] | ||
then | ||
echo "" | ||
echo "RecombineX message: This bash script has been successfully processed! :)" | ||
echo "" | ||
echo "" | ||
exit 0 | ||
fi | ||
############################ |
40 changes: 40 additions & 0 deletions
40
Project_Template/00.Parent_Reads/RecombineX.00.Retrieve_SRA_Reads.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#!/bin/bash | ||
set -e -o pipefail | ||
|
||
####################################### | ||
# load environment variables for RecombineX | ||
source ./../../env.sh | ||
|
||
####################################### | ||
# set project-specific variables | ||
sra_run_list="sample2reads_map.txt" # A simple tab separated file with two columns, in which the first column contains the sample name and the sencond column contains the corresponding SRR id. Lines started with "#" will be ignored. Default = "sample2reads_map.txt". | ||
####################################### | ||
|
||
|
||
|
||
####################################### | ||
# process the pipeline | ||
|
||
while read -r line | ||
do | ||
[[ $line == \#* ]] && continue | ||
[[ $line == "" ]] && continue | ||
IFS=$'\t' read -r sample_id srr_id <<<"$line" | ||
echo "retrieve reads by the SRR_id: $srr_id for the sample $sample_id ..." | ||
$sra_dir/fastq-dump --defline-seq '@$sn[_$rn]/$ri' --defline-qual '+$sn[_$rn]/$ri' \ | ||
--gzip --split-files -skip-technical --dumpbase --read-filter pass --clip $srr_id | ||
mv ${srr_id}_pass_1.fastq.gz $sample_id.R1.fq.gz | ||
mv ${srr_id}_pass_2.fastq.gz $sample_id.R2.fq.gz | ||
done < $sra_run_list | ||
|
||
############################ | ||
# checking bash exit status | ||
if [[ $? -eq 0 ]] | ||
then | ||
echo "" | ||
echo "RecombineX message: This bash script has been successfully processed! :)" | ||
echo "" | ||
echo "" | ||
exit 0 | ||
fi | ||
############################ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
## data source: https://www.ncbi.nlm.nih.gov/bioproject/?term=PRJNA300835 | ||
## https://www.ncbi.nlm.nih.gov/sra?linkname=bioproject_sra_all&from_uid=300835 | ||
# sample_id SRR_id | ||
SK1 SRR2984786 | ||
S288C SRR2984785 |
65 changes: 65 additions & 0 deletions
65
....Reference_Genome/RecombineX.00.Prepare_Reference_Genome_for_Chlamydomonas_reinhardtii.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#!/bin/bash | ||
set -e -o pipefail | ||
|
||
####################################### | ||
# load environment variables for Varathon | ||
source ./../../env.sh | ||
|
||
####################################### | ||
# set project-specific variables | ||
ref_genome_prefix="Chlamydomonas_reinhardtii" # The file name prefix of the reference genome. Default = "Chlamydomonas_reinhardtii". | ||
ref_genome_download_URL="ftp://ftp.ensemblgenomes.org/pub/plants/release-49/fasta/chlamydomonas_reinhardtii/dna/Chlamydomonas_reinhardtii.Chlamydomonas_reinhardtii_v5.5.dna_sm.toplevel.fa.gz" # The URL for downloading the reference genome. Default = "ftp://ftp.ensemblgenomes.org/pub/plants/release-49/fasta/chlamydomonas_reinhardtii/dna/Chlamydomonas_reinhardtii.Chlamydomonas_reinhardtii_v5.5.dna_sm.toplevel.fa.gz". | ||
chr_list="./../../data/Chlamydomonas_reinhardtii.chr_list.txt" # The single-column list defining chromosomes/scaffolds/contigs to be included. Default = ./../../data/Chlamydomonas_reinhardtii.chr_list.txt". | ||
debug="no" # Whether to keep intermediate files for debuging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". | ||
####################################### | ||
|
||
|
||
|
||
|
||
####################################### | ||
# process the pipeline | ||
|
||
download_and_extract() { | ||
url=$1 | ||
echo "Downloading $url" | ||
if [[ $url =~ \.gz$ ]]; | ||
#if [[ $url =~ \.fa.gz$ || $url =~ \.fasta.gz$ ]]; | ||
then | ||
download_location="$ref_genome_prefix.raw.fa.gz" | ||
extract_command="gunzip" | ||
wget -c --no-check-certificate $url -O $download_location | ||
gunzip $download_location | ||
else | ||
download_location="$ref_genome_prefix.raw.fa" | ||
wget -c --no-check-certificate $url -O $download_location | ||
fi | ||
} | ||
|
||
echo "" | ||
echo "Retrieve the sample reference genome assembly ..." | ||
download_and_extract $ref_genome_download_URL | ||
echo "" | ||
echo "Tidy the sample reference genome assembly ..." | ||
$RECOMBINEX_HOME/scripts/tidy_fasta.pl -i $ref_genome_prefix.raw.fa -o $ref_genome_prefix.tidy.fa | ||
sed -i "s/>/>chr/gi" $ref_genome_prefix.tidy.fa | ||
$RECOMBINEX_HOME/scripts/select_fasta_by_list.pl -i $ref_genome_prefix.tidy.fa -l $chr_list -m normal -o $ref_genome_prefix.tidy.lite.fa.gz | ||
|
||
if [[ $debug = "no" ]] | ||
then | ||
echo "" | ||
echo "Removing intermediate files ..." | ||
rm $ref_genome_prefix.raw.fa | ||
rm $ref_genome_prefix.tidy.fa | ||
fi | ||
|
||
############################ | ||
# checking bash exit status | ||
if [[ $? -eq 0 ]] | ||
then | ||
echo "" | ||
echo "RecombineX message: This bash script has been successfully processed! :)" | ||
echo "" | ||
echo "" | ||
exit 0 | ||
fi | ||
############################ |
53 changes: 53 additions & 0 deletions
53
...0.Reference_Genome/RecombineX.00.Prepare_Reference_Genome_for_Saccharomyces_cerevisiae.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#!/bin/bash | ||
set -e -o pipefail | ||
|
||
####################################### | ||
# load environment variables for RecombineX | ||
source ./../../env.sh | ||
|
||
####################################### | ||
# set project-specific variables | ||
|
||
debug="no" # Whether to keep intermediate files for debuging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". | ||
|
||
####################################### | ||
|
||
|
||
|
||
###################################### | ||
# process the pipeline | ||
echo "retrieve sample reference genome data ..." | ||
wget -c https://downloads.yeastgenome.org/sequence/S288C_reference/genome_releases/S288C_reference_genome_R64-2-1_20150113.tgz | ||
tar -xvzf S288C_reference_genome_R64-2-1_20150113.tgz | ||
cp ./S288C_reference_genome_R64-2-1_20150113/S288C_reference_sequence_R64-2-1_20150113.fsa SGDref.genome.raw.fa | ||
cp ./S288C_reference_genome_R64-2-1_20150113/saccharomyces_cerevisiae_R64-2-1_20150113.gff SGDref.all_feature.gff | ||
perl $RECOMBINEX_HOME/scripts/tidy_SGDref_genome.pl -i SGDref.genome.raw.fa -o SGDref.genome.tidy.fa | ||
perl $RECOMBINEX_HOME/scripts/select_fasta_by_list.pl -i SGDref.genome.tidy.fa -l $RECOMBINEX_HOME/data/Saccharomyces_cerevisiae.chr_list.txt -o SGDref.genome.fa -m normal | ||
gzip SGDref.genome.fa | ||
perl $RECOMBINEX_HOME/scripts/filter_gff_by_feature.pl -i SGDref.all_feature.gff -o SGDref.centromere.gff -f centromere -m keep | ||
|
||
# echo "retrieve sample subtelomere GFF files ..." | ||
# cp $RECOMBINEX_HOME/data/Saccharomyces_cerevisiae_subtelomere_gff3/SGDref.subtelomere.gff . | ||
|
||
if [[ $debug = "no" ]] | ||
then | ||
echo "" | ||
echo "removing intermediate files and directories ..." | ||
|
||
rm -rf S288C_reference_genome_R64-2-1_20150113* | ||
rm SGDref.genome.raw.fa | ||
rm SGDref.genome.tidy.fa | ||
rm SGDref.all_feature.gff | ||
fi | ||
|
||
############################ | ||
# checking bash exit status | ||
if [[ $? -eq 0 ]] | ||
then | ||
echo "" | ||
echo "RecombineX message: This bash script has been successfully processed! :)" | ||
echo "" | ||
echo "" | ||
exit 0 | ||
fi | ||
############################ |
Oops, something went wrong.