Skip to content

Commit

Permalink
Merge branch 'fdog_goes_assembly' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
HannahBioI authored Mar 10, 2022
2 parents 9146028 + 14c852c commit 3bb9075
Show file tree
Hide file tree
Showing 12 changed files with 730 additions and 552 deletions.
Binary file modified .DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
[![Build Status](https://travis-ci.com/BIONF/fDOG.svg?branch=master)](https://travis-ci.com/BIONF/fDOG)
![Github Build](https://github.com/BIONF/fDOG/workflows/build/badge.svg)

# Poster fDOG - Assembly
https://github.com/BIONF/fDOG/blob/gh-pages/www/Poster_fDOG_Assembly.pdf
# Table of Contents
* [How to install](#how-to-install)
* [Install the fDOG package](#install-the-fdog-package)
Expand Down
Binary file modified fdog/.DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions fdog/bin/hamstr.pl
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@

######################## start main ###########################################
my $version = "HaMStR v.13.4.6";

######################## checking whether the configure script has been run ###
my $configure = 0;
if ($configure == 0){
Expand Down
74 changes: 21 additions & 53 deletions fdog/bin/oneSeq.pl
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@

############ General settings
my $version = 'oneSeq v.2.4.1';

##### configure for checking if the setup.sh script already run
my $configure = 0;
if ($configure == 0){
Expand Down Expand Up @@ -208,7 +209,6 @@
my $idx_dir = "$path/taxonomy/";
my $dataDir = $path . '/data';
my $weightPath = "$path/weight_dir/";
my $assembly_dir = "$path/assembly_dir/";

my @defaultRanks = (
'superkingdom', 'kingdom',
Expand Down Expand Up @@ -313,15 +313,6 @@
my %hashTree;
my $aln = 'muscle';
my $searchTaxa;
#variables for fdog_goes_assembly
my $assembly;
my $augustusRefSpec;
my $avIntron;
my $lengthExtension;
my $assemblyPath;
my $searchTool = 'blast';
my $matrix = 'blosum62';
my $dataPath = '';
################# Command line options
GetOptions (
"h" => \$help,
Expand Down Expand Up @@ -384,15 +375,7 @@
"distDeviation=s" => \$distDeviation,
"aligner=s" => \$aln,
"hyperthread" => \$hyperthread,
"searchTaxa=s" => \$searchTaxa,
"assembly" => \$assembly,
"assemblypath=s" => \$assemblyPath,
"augustusRefSpec=s" => \$augustusRefSpec,
"avIntron=s" => \$avIntron,
"lengthExtension=s" => \$lengthExtension,
"searchTool=s" => \$searchTool,
"scoringmatrix=s" => \$matrix,
"dataPath=s" => \$dataPath
"searchTaxa=s" => \$searchTaxa
);

$outputPath = abs_path($outputPath);
Expand All @@ -404,8 +387,6 @@
$weightPath = abs_path($weightPath)."/";
$genome_dir = abs_path($genome_dir)."/";
$taxaPath = $genome_dir;
$dataPath = abs_path($dataPath)."/";
$assembly_dir = abs_path($assemblyPath)."/";

############# do initial check
if (!defined $help && !defined $getversion) { #} && !defined $showTaxa) {
Expand All @@ -415,7 +396,7 @@
initialCheck($seqFile, $seqName, $blastPath, $taxaPath, $weightPath, $fasoff);
}

if (!defined $coreex && !defined $assembly) {
if (!defined $coreex) {
if (!grep(/$minDist/, @defaultRanks)) {
die "ERROR: minDist $minDist invalid!\n";
}
Expand Down Expand Up @@ -499,7 +480,7 @@

# create weight_dir in oneseq's home dir (used for annotations,weighting,feature extraction)
# get annotations for seed sequence if fas support is on
if ($fas_support && !$assembly){
if ($fas_support){
if (!$weightPath) {
createWeightFolder();
}
Expand All @@ -508,7 +489,7 @@

my $coreStTime = gettime(); #time;
#core-ortholog search
if (!$coreex && !$assembly) {
if (!$coreex) {
print "\nCore compiling...\n";
$coremode = 1;
$taxaPath = $blastPath;
Expand Down Expand Up @@ -646,12 +627,7 @@
my $final_eval_blast = $eval_blast*$eval_relaxfac;
my $final_eval_hmmer = $eval_hmmer*$eval_relaxfac;

if (!$assembly){
$taxaPath = $genome_dir;
}
else{
$taxaPath = $assembly_dir;
}
$taxaPath = $genome_dir;
my @searchTaxa;
unless ($searchTaxa) {
unless($groupNode) {
Expand Down Expand Up @@ -741,11 +717,15 @@
if (-e $finalOutput) {
addSeedSeq($seqId, $seqName, $coreOrthologsPath, $refSpec, $finalOutput);
}
### remove duplicated seq in extended.fa
if (-e $finalOutput) {
addSeedSeq($seqId, $seqName, $coreOrthologsPath, $refSpec, $finalOutput);
}
push @logOUT, "Ortholog search completed in ". roundtime(gettime() - $orthoStTime) ." sec!";
print "==> Ortholog search completed in ". roundtime(gettime() - $orthoStTime) ." sec!\n";


if(!$coreOnly && !$assembly){
## Evaluation of all orthologs that are predicted by the final run
if(!$coreOnly){
my $fasStTime = gettime();
my $processID = $$;

Expand All @@ -757,7 +737,7 @@
addSeedSeq($seqId, $seqName, $coreOrthologsPath, $refSpec, $finalOutput);

# calculate FAS scores for final extended.fa
if ($fas_support && !$assembly) {
if ($fas_support) {
print "Starting the feature architecture similarity score computation...\n";
my $fdogFAScmd = "$fdogFAS_prog -i $finalOutput -w $weightPath -t $tmpdir -o $outputPath --cores $cpu --redo_anno";
unless ($countercheck) {
Expand All @@ -770,21 +750,12 @@
}
push @logOUT, "FAS calculation completed in " . roundtime(gettime() - $fasStTime). " sec!\n";
print "==> FAS calculation completed in " . roundtime(gettime() - $fasStTime). " sec!\n";

if($autoclean){
print "Cleaning up...\n";
runAutoCleanUp($processID);
}
}

if ($assembly){
my $file_assembly_out;
$file_assembly_out = $outputPath . '/' . $seqName;
my $cmd_merge;
$cmd_merge = "fdog.mergeAssembly --in $outputPath --out $file_assembly_out --cleanup";
printDebug($cmd_merge);
system($cmd_merge);
}
## Delete tmp folder
unless ($debug) {
my $delTmp = "rm -rf $tmpdir";
Expand Down Expand Up @@ -1194,10 +1165,10 @@ sub checkOptions {
if ($force == 1 and $append ==1) {
$force = 0;
}
### check the presence of the pre-computed core set if options reuseCore or assembly is used
if ($coreex || $assembly) {
### check the presence of the pre-computed core set
if ($coreex) {
if (! -e "$coreOrthologsPath/$seqName/$seqName.fa") {
print "You selected the option -reuseCore or -assembly, but the core ortholog group $coreOrthologsPath/$seqName/hmm_dir/$seqName.hmm does not exist\n";
print "You selected the option -reuseCore, but the core ortholog group $coreOrthologsPath/$seqName/hmm_dir/$seqName.hmm does not exist\n";
exit;
}
}
Expand Down Expand Up @@ -1268,7 +1239,7 @@ sub checkOptions {

### checking the number of core orthologs. Omit this check if the option -reuseCore has been selected
$optbreaker = 0;
while(!$minCoreOrthologs and (!$coreex and !$assembly)) {
while(!$minCoreOrthologs and !$coreex) {
if ($optbreaker >= 3){
print "No proper number given ... exiting.\n";
exit;
Expand All @@ -1283,12 +1254,10 @@ sub checkOptions {
$filter = 'no' if $filter eq 'F';
}

if (!$assembly){
$inputSeq = fetchSequence($seqFile, $dataDir);
}
$inputSeq = fetchSequence($seqFile, $dataDir);

## the user has not provided a sequence id, however, the refspec is determined.
if($seqId eq '' && !$assembly) {
if($seqId eq '') {
my $besthit;
if (!$blast){
## a refspec has been determined
Expand Down Expand Up @@ -1398,9 +1367,8 @@ sub checkOptions {
#### checking for the min and max distance for the core set compilation
#### omit this check, if the option reuseCore has been selected (added 2019-02-04)
$optbreaker = 0;
if (!$coreex and !$assembly) {
if (!$coreex) {
my $node;
#print "Testing coreex assembly\n";
$node = $db->get_taxon(-taxonid => $refTaxa{$refSpec});
$node->name('supplied', $refSpec);
if (lc($maxDist) eq "root"){
Expand Down Expand Up @@ -2673,7 +2641,7 @@ sub initialCheck {
}
}
# check weight_dir
if ($fasoff != 1 && !$assembly) {
if ($fasoff != 1) {
my %seen;
my @allTaxa = grep( !$seen{$_}++, @genomeDir, @blastDir);
my @notFolder;
Expand Down
Loading

0 comments on commit 3bb9075

Please sign in to comment.