Skip to content

Commit

Permalink
release candidate 20210310
Browse files Browse the repository at this point in the history
  • Loading branch information
Jiaxing Yue authored and yjx1217 committed Mar 10, 2021
1 parent 6631911 commit 3e0244e
Show file tree
Hide file tree
Showing 104 changed files with 18,164 additions and 2 deletions.
2 changes: 1 addition & 1 deletion LICENSE → LICENSE.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2018 Jia-Xing Yue
Copyright (c) 2021 Jia-Xing Yue

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
Binary file added Manual.pdf
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash
set -e -o pipefail

#######################################
# load environment variables for RecombineX
source ./../../env.sh

#######################################
# set project-specific variables
sra_run_list="sample2reads_map.txt" # A simple tab separated file with two columns, in which the first column contains the sample name and the sencond column contains the corresponding SRR id. Lines started with "#" will be ignored. Default = "sample2reads_map.txt".
#######################################





#######################################
# process the pipeline

while read -r line
do
[[ $line == \#* ]] && continue
[[ $line == "" ]] && continue
IFS=$'\t' read -r sample_id srr_id <<<"$line"
echo "retrieve reads by the SRR_id: $srr_id for the sample $sample_id ..."
$sra_dir/fastq-dump --defline-seq '@$sn[_$rn]/$ri' --defline-qual '+$sn[_$rn]/$ri' \
--gzip --split-files -skip-technical --dumpbase --read-filter pass --clip $srr_id
mv ${srr_id}_pass_1.fastq.gz $sample_id.R1.fq.gz
mv ${srr_id}_pass_2.fastq.gz $sample_id.R2.fq.gz
done < $sra_run_list

############################
# checking bash exit status
if [[ $? -eq 0 ]]
then
echo ""
echo "RecombineX message: This bash script has been successfully processed! :)"
echo ""
echo ""
exit 0
fi
############################
23 changes: 23 additions & 0 deletions Project_Template/00.Gamete_Reads/sample2reads_map.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
## data source: https://www.ncbi.nlm.nih.gov/bioproject/?term=PRJNA309059
## https://www.ncbi.nlm.nih.gov/sra?linkname=bioproject_sra_all&from_uid=309059
# sample_id SRR_id
AND1702-8A SRR2984859
AND1702-8B SRR2984860
AND1702-8C SRR2984861
AND1702-8D SRR2984862
AND1702-9A SRR2984863
AND1702-9B SRR2984864
AND1702-9C SRR2984865
AND1702-9D SRR2984866
AND1702-10A SRR2984867
AND1702-10B SRR2984868
AND1702-10C SRR2984869
AND1702-10D SRR2984870
AND1702-11A SRR2984871
AND1702-11B SRR2984872
AND1702-11C SRR2984873
AND1702-11D SRR2984874
AND1702-12A SRR2984875
AND1702-12B SRR2984876
AND1702-12C SRR2984877
AND1702-12D SRR2984878
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash
set -e -o pipefail

#######################################
# load environment variables for RecombineX
source ./../../env.sh

#######################################
# set project-specific variables

# none

#######################################
# process the pipeline

echo "retrieve sample parental genome data ..."
for i in S288C SK1
do
cp $RECOMBINEX_HOME/data/$i.genome.fa .
cp $RECOMBINEX_HOME/data/$i.all_feature.gff .
perl $RECOMBINEX_HOME/scripts/filter_gff_by_feature.pl -i $i.all_feature.gff -o $i.centromere.gff -f centromere -m keep
done

# echo "retrieve sample subtelomere GFF files ..."
# for i in S288C SK1
# do
# cp $RECOMBINEX_HOME/data/Saccharomyces_cerevisiae_subtelomere_gff3/$i.subtelomere.gff .
# done

echo ""
echo "removing intermediate files and directories ..."
for i in S288C SK1
do
rm $i.all_feature.gff
done


############################
# checking bash exit status
if [[ $? -eq 0 ]]
then
echo ""
echo "RecombineX message: This bash script has been successfully processed! :)"
echo ""
echo ""
exit 0
fi
############################
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash
set -e -o pipefail

#######################################
# load environment variables for RecombineX
source ./../../env.sh

#######################################
# set project-specific variables
sra_run_list="sample2reads_map.txt" # A simple tab separated file with two columns, in which the first column contains the sample name and the sencond column contains the corresponding SRR id. Lines started with "#" will be ignored. Default = "sample2reads_map.txt".
#######################################



#######################################
# process the pipeline

while read -r line
do
[[ $line == \#* ]] && continue
[[ $line == "" ]] && continue
IFS=$'\t' read -r sample_id srr_id <<<"$line"
echo "retrieve reads by the SRR_id: $srr_id for the sample $sample_id ..."
$sra_dir/fastq-dump --defline-seq '@$sn[_$rn]/$ri' --defline-qual '+$sn[_$rn]/$ri' \
--gzip --split-files -skip-technical --dumpbase --read-filter pass --clip $srr_id
mv ${srr_id}_pass_1.fastq.gz $sample_id.R1.fq.gz
mv ${srr_id}_pass_2.fastq.gz $sample_id.R2.fq.gz
done < $sra_run_list

############################
# checking bash exit status
if [[ $? -eq 0 ]]
then
echo ""
echo "RecombineX message: This bash script has been successfully processed! :)"
echo ""
echo ""
exit 0
fi
############################
5 changes: 5 additions & 0 deletions Project_Template/00.Parent_Reads/sample2reads_map.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
## data source: https://www.ncbi.nlm.nih.gov/bioproject/?term=PRJNA300835
## https://www.ncbi.nlm.nih.gov/sra?linkname=bioproject_sra_all&from_uid=300835
# sample_id SRR_id
SK1 SRR2984786
S288C SRR2984785
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/bin/bash
set -e -o pipefail

#######################################
# load environment variables for Varathon
source ./../../env.sh

#######################################
# set project-specific variables
ref_genome_prefix="Chlamydomonas_reinhardtii" # The file name prefix of the reference genome. Default = "Chlamydomonas_reinhardtii".
ref_genome_download_URL="ftp://ftp.ensemblgenomes.org/pub/plants/release-49/fasta/chlamydomonas_reinhardtii/dna/Chlamydomonas_reinhardtii.Chlamydomonas_reinhardtii_v5.5.dna_sm.toplevel.fa.gz" # The URL for downloading the reference genome. Default = "ftp://ftp.ensemblgenomes.org/pub/plants/release-49/fasta/chlamydomonas_reinhardtii/dna/Chlamydomonas_reinhardtii.Chlamydomonas_reinhardtii_v5.5.dna_sm.toplevel.fa.gz".
chr_list="./../../data/Chlamydomonas_reinhardtii.chr_list.txt" # The single-column list defining chromosomes/scaffolds/contigs to be included. Default = ./../../data/Chlamydomonas_reinhardtii.chr_list.txt".
debug="no" # Whether to keep intermediate files for debuging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no".
#######################################




#######################################
# process the pipeline

download_and_extract() {
url=$1
echo "Downloading $url"
if [[ $url =~ \.gz$ ]];
#if [[ $url =~ \.fa.gz$ || $url =~ \.fasta.gz$ ]];
then
download_location="$ref_genome_prefix.raw.fa.gz"
extract_command="gunzip"
wget -c --no-check-certificate $url -O $download_location
gunzip $download_location
else
download_location="$ref_genome_prefix.raw.fa"
wget -c --no-check-certificate $url -O $download_location
fi
}

echo ""
echo "Retrieve the sample reference genome assembly ..."
download_and_extract $ref_genome_download_URL
echo ""
echo "Tidy the sample reference genome assembly ..."
$RECOMBINEX_HOME/scripts/tidy_fasta.pl -i $ref_genome_prefix.raw.fa -o $ref_genome_prefix.tidy.fa
sed -i "s/>/>chr/gi" $ref_genome_prefix.tidy.fa
$RECOMBINEX_HOME/scripts/select_fasta_by_list.pl -i $ref_genome_prefix.tidy.fa -l $chr_list -m normal -o $ref_genome_prefix.tidy.lite.fa.gz

if [[ $debug = "no" ]]
then
echo ""
echo "Removing intermediate files ..."
rm $ref_genome_prefix.raw.fa
rm $ref_genome_prefix.tidy.fa
fi

############################
# checking bash exit status
if [[ $? -eq 0 ]]
then
echo ""
echo "RecombineX message: This bash script has been successfully processed! :)"
echo ""
echo ""
exit 0
fi
############################
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash
set -e -o pipefail

#######################################
# load environment variables for RecombineX
source ./../../env.sh

#######################################
# set project-specific variables

debug="no" # Whether to keep intermediate files for debuging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no".

#######################################



######################################
# process the pipeline
echo "retrieve sample reference genome data ..."
wget -c https://downloads.yeastgenome.org/sequence/S288C_reference/genome_releases/S288C_reference_genome_R64-2-1_20150113.tgz
tar -xvzf S288C_reference_genome_R64-2-1_20150113.tgz
cp ./S288C_reference_genome_R64-2-1_20150113/S288C_reference_sequence_R64-2-1_20150113.fsa SGDref.genome.raw.fa
cp ./S288C_reference_genome_R64-2-1_20150113/saccharomyces_cerevisiae_R64-2-1_20150113.gff SGDref.all_feature.gff
perl $RECOMBINEX_HOME/scripts/tidy_SGDref_genome.pl -i SGDref.genome.raw.fa -o SGDref.genome.tidy.fa
perl $RECOMBINEX_HOME/scripts/select_fasta_by_list.pl -i SGDref.genome.tidy.fa -l $RECOMBINEX_HOME/data/Saccharomyces_cerevisiae.chr_list.txt -o SGDref.genome.fa -m normal
gzip SGDref.genome.fa
perl $RECOMBINEX_HOME/scripts/filter_gff_by_feature.pl -i SGDref.all_feature.gff -o SGDref.centromere.gff -f centromere -m keep

# echo "retrieve sample subtelomere GFF files ..."
# cp $RECOMBINEX_HOME/data/Saccharomyces_cerevisiae_subtelomere_gff3/SGDref.subtelomere.gff .

if [[ $debug = "no" ]]
then
echo ""
echo "removing intermediate files and directories ..."

rm -rf S288C_reference_genome_R64-2-1_20150113*
rm SGDref.genome.raw.fa
rm SGDref.genome.tidy.fa
rm SGDref.all_feature.gff
fi

############################
# checking bash exit status
if [[ $? -eq 0 ]]
then
echo ""
echo "RecombineX message: This bash script has been successfully processed! :)"
echo ""
echo ""
exit 0
fi
############################
Loading

0 comments on commit 3e0244e

Please sign in to comment.