diff --git a/.DS_Store b/.DS_Store
index fa2521e..bcbd073 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/README.md b/README.md
index 52f11e2..8db83ce 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,8 @@
 [![Build Status](https://travis-ci.com/BIONF/fDOG.svg?branch=master)](https://travis-ci.com/BIONF/fDOG)
 ![Github Build](https://github.com/BIONF/fDOG/workflows/build/badge.svg)
 
+# Poster fDOG - Assembly
+https://github.com/BIONF/fDOG/blob/gh-pages/www/Poster_fDOG_Assembly.pdf
 # Table of Contents
 * [How to install](#how-to-install)
      * [Install the fDOG package](#install-the-fdog-package)
diff --git a/fdog/.DS_Store b/fdog/.DS_Store
index f638c26..a99a01c 100644
Binary files a/fdog/.DS_Store and b/fdog/.DS_Store differ
diff --git a/fdog/bin/hamstr.pl b/fdog/bin/hamstr.pl
index b762854..09ec8ec 100755
--- a/fdog/bin/hamstr.pl
+++ b/fdog/bin/hamstr.pl
@@ -200,6 +200,7 @@
 
 ######################## start main ###########################################
 my $version = "HaMStR v.13.4.6";
+
 ######################## checking whether the configure script has been run ###
 my $configure = 0;
 if ($configure == 0){
diff --git a/fdog/bin/oneSeq.pl b/fdog/bin/oneSeq.pl
index 0352c0d..2fe8333 100755
--- a/fdog/bin/oneSeq.pl
+++ b/fdog/bin/oneSeq.pl
@@ -135,6 +135,7 @@
 
 ############ General settings
 my $version = 'oneSeq v.2.4.1';
+
 ##### configure for checking if the setup.sh script already run
 my $configure = 0;
 if ($configure == 0){
@@ -208,7 +209,6 @@
 my $idx_dir = "$path/taxonomy/";
 my $dataDir = $path . '/data';
 my $weightPath = "$path/weight_dir/";
-my $assembly_dir = "$path/assembly_dir/";
 
 my @defaultRanks = (
 	'superkingdom', 'kingdom',
@@ -313,15 +313,6 @@
 my %hashTree;
 my $aln = 'muscle';
 my $searchTaxa;
-#variables for fdog_goes_assembly
-my $assembly;
-my $augustusRefSpec;
-my $avIntron;
-my $lengthExtension;
-my $assemblyPath;
-my $searchTool = 'blast';
-my $matrix = 'blosum62';
-my $dataPath = '';
 ################# Command line options
 GetOptions (
 	"h"                 => \$help,
@@ -384,15 +375,7 @@
 	"distDeviation=s"	=> \$distDeviation,
 	"aligner=s"	=> \$aln,
 	"hyperthread" => \$hyperthread,
-	"searchTaxa=s" => \$searchTaxa,
-	"assembly" => \$assembly,
-	"assemblypath=s" => \$assemblyPath,
-	"augustusRefSpec=s" => \$augustusRefSpec,
-	"avIntron=s" => \$avIntron,
-	"lengthExtension=s" => \$lengthExtension,
-	"searchTool=s" => \$searchTool,
-	"scoringmatrix=s" => \$matrix,
-	"dataPath=s" => \$dataPath
+	"searchTaxa=s" => \$searchTaxa
 );
 
 $outputPath = abs_path($outputPath);
@@ -404,8 +387,6 @@
 $weightPath = abs_path($weightPath)."/";
 $genome_dir = abs_path($genome_dir)."/";
 $taxaPath = $genome_dir;
-$dataPath = abs_path($dataPath)."/";
-$assembly_dir = abs_path($assemblyPath)."/";
 
 ############# do initial check
 if (!defined $help && !defined $getversion) { #} && !defined $showTaxa) {
@@ -415,7 +396,7 @@
 		initialCheck($seqFile, $seqName, $blastPath, $taxaPath, $weightPath, $fasoff);
 	}
 
-	if (!defined $coreex && !defined $assembly) {
+	if (!defined $coreex) {
 		if (!grep(/$minDist/, @defaultRanks)) {
 			die "ERROR: minDist $minDist invalid!\n";
 		}
@@ -499,7 +480,7 @@
 
 # create weight_dir in oneseq's home dir (used for annotations,weighting,feature extraction)
 # get annotations for seed sequence if fas support is on
-if ($fas_support && !$assembly){
+if ($fas_support){
 	if (!$weightPath) {
 		createWeightFolder();
 	}
@@ -508,7 +489,7 @@
 
 my $coreStTime = gettime(); #time;
 #core-ortholog search
-if (!$coreex && !$assembly) {
+if (!$coreex) {
 	print "\nCore compiling...\n";
 	$coremode = 1;
 	$taxaPath = $blastPath;
@@ -646,12 +627,7 @@
 	my $final_eval_blast = $eval_blast*$eval_relaxfac;
 	my $final_eval_hmmer = $eval_hmmer*$eval_relaxfac;
 
-	if (!$assembly){
-		$taxaPath = $genome_dir;
-	}
-	else{
-		$taxaPath = $assembly_dir;
-	}
+	$taxaPath = $genome_dir;
 	my @searchTaxa;
 	unless ($searchTaxa) {
 		unless($groupNode) {
@@ -741,11 +717,15 @@
 if (-e $finalOutput) {
 	addSeedSeq($seqId, $seqName, $coreOrthologsPath, $refSpec, $finalOutput);
 }
+### remove duplicated seq in extended.fa
+if (-e $finalOutput) {
+	addSeedSeq($seqId, $seqName, $coreOrthologsPath, $refSpec, $finalOutput);
+}
 push @logOUT, "Ortholog search completed in ". roundtime(gettime() - $orthoStTime) ." sec!";
 print "==> Ortholog search completed in ". roundtime(gettime() - $orthoStTime) ." sec!\n";
 
-
-if(!$coreOnly && !$assembly){
+## Evaluation of all orthologs that are predicted by the final run
+if(!$coreOnly){
 	my $fasStTime = gettime();
 	my $processID = $$;
 
@@ -757,7 +737,7 @@
 	addSeedSeq($seqId, $seqName, $coreOrthologsPath, $refSpec, $finalOutput);
 
 	# calculate FAS scores for final extended.fa
-	if ($fas_support && !$assembly) {
+	if ($fas_support) {
 		print "Starting the feature architecture similarity score computation...\n";
 		my $fdogFAScmd = "$fdogFAS_prog -i $finalOutput -w $weightPath -t $tmpdir -o $outputPath --cores $cpu --redo_anno";
 		unless ($countercheck) {
@@ -770,21 +750,12 @@
 	}
 	push @logOUT, "FAS calculation completed in " . roundtime(gettime() - $fasStTime). " sec!\n";
 	print "==> FAS calculation completed in " . roundtime(gettime() - $fasStTime). " sec!\n";
-
 	if($autoclean){
 		print "Cleaning up...\n";
 		runAutoCleanUp($processID);
 	}
 }
 
-if ($assembly){
-	my $file_assembly_out;
-	$file_assembly_out = $outputPath . '/' . $seqName;
-	my $cmd_merge;
-	$cmd_merge = "fdog.mergeAssembly --in  $outputPath --out  $file_assembly_out --cleanup";
-	printDebug($cmd_merge);
-	system($cmd_merge);
-}
 ## Delete tmp folder
 unless ($debug) {
 	my $delTmp = "rm -rf $tmpdir";
@@ -1194,10 +1165,10 @@ sub checkOptions {
 	if ($force == 1 and $append ==1) {
 		$force = 0;
 	}
-	### check the presence of the pre-computed core set if options reuseCore or assembly is used
-	if ($coreex || $assembly) {
+	### check the presence of the pre-computed core set
+	if ($coreex) {
 		if (! -e "$coreOrthologsPath/$seqName/$seqName.fa") {
-			print "You selected the option -reuseCore or -assembly, but the core ortholog group $coreOrthologsPath/$seqName/hmm_dir/$seqName.hmm does not exist\n";
+			print "You selected the option -reuseCore, but the core ortholog group $coreOrthologsPath/$seqName/hmm_dir/$seqName.hmm does not exist\n";
 			exit;
 		}
 	}
@@ -1268,7 +1239,7 @@ sub checkOptions {
 
 	### checking the number of core orthologs. Omit this check if the option -reuseCore has been selected
 	$optbreaker = 0;
-	while(!$minCoreOrthologs and (!$coreex and !$assembly)) {
+	while(!$minCoreOrthologs and !$coreex) {
 		if ($optbreaker >= 3){
 			print "No proper number given ... exiting.\n";
 			exit;
@@ -1283,12 +1254,10 @@ sub checkOptions {
 		$filter = 'no' if $filter eq 'F';
 	}
 
-	if (!$assembly){
-		$inputSeq = fetchSequence($seqFile, $dataDir);
-	}
+	$inputSeq = fetchSequence($seqFile, $dataDir);
 
 	## the user has not provided a sequence id, however, the refspec is determined.
-	if($seqId eq '' && !$assembly) {
+	if($seqId eq '') {
 		my $besthit;
 		if (!$blast){
 			## a refspec has been determined
@@ -1398,9 +1367,8 @@ sub checkOptions {
 	#### checking for the min and max distance for the core set compilation
 	#### omit this check, if the option reuseCore has been selected (added 2019-02-04)
 	$optbreaker = 0;
-	if (!$coreex and !$assembly) {
+	if (!$coreex) {
 		my $node;
-		#print "Testing coreex assembly\n";
 		$node = $db->get_taxon(-taxonid => $refTaxa{$refSpec});
 		$node->name('supplied', $refSpec);
 		if (lc($maxDist) eq "root"){
@@ -2673,7 +2641,7 @@ sub initialCheck {
 		}
 	}
 	# check weight_dir
-	if ($fasoff != 1 && !$assembly) {
+	if ($fasoff != 1) {
 		my %seen;
 		my @allTaxa = grep( !$seen{$_}++, @genomeDir, @blastDir);
 		my @notFolder;
diff --git a/fdog/fDOGassembly.py b/fdog/fDOGassembly.py
index b802b26..7027236 100644
--- a/fdog/fDOGassembly.py
+++ b/fdog/fDOGassembly.py
@@ -1,3 +1,21 @@
+# -*- coding: utf-8 -*-
+
+#######################################################################
+# Copyright (C) 2021 Hannah Muelbaier
+#
+#  This script is used to run fDOG-Assembly which performs targeted ortholog
+#  searches on genome assemblies
+#
+#  This script is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License <http://www.gnu.org/licenses/> for
+#  more details
+#
+#  Contact: hannah.muelbaier@gmail.com
+#
+#######################################################################
+
 ############################ imports ###########################################
 import os
 import os.path
@@ -8,7 +26,31 @@
 import argparse
 import yaml
 import subprocess
+import time
+import shutil
+import multiprocessing as mp
+
 ########################### functions ##########################################
+def check_path(path):
+    if not os.path.exists(path):
+        print(path + " does not exist. Exciting ...")
+        sys.exit()
+
+def check_ref_sepc(species_list, fasta_file):
+    file = open(fasta_file, "r")
+    lines = file.readlines()
+    species_file = []
+
+    for line in lines:
+        if line[0] == ">":
+            species = line.split("|")[1]
+            species_file.append(species)
+    for species in species_list:
+        if species in species_file:
+            return species
+    print("Reference species is not part of the ortholog group. Exciting ...")
+    sys.exit()
+
 def load_config(config_file):
     with open(config_file, 'r') as stream:
         try:
@@ -16,82 +58,129 @@ def load_config(config_file):
         except yaml.YAMLError as exc:
             print(exc)
 
+def starting_subprocess(cmd, mode, time_out = None):
+
+    try:
+        if mode == 'debug':
+            result = subprocess.run(cmd, shell=True, timeout = time_out)
+        elif mode == 'silent':
+            result = subprocess.run(cmd, stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell=True, timeout = time_out)
+        elif mode == 'normal':
+            result = subprocess.run(cmd, stdout = subprocess.PIPE, shell=True, timeout = time_out)
+    except subprocess.TimeoutExpired:
+        return 1
+
 def merge(blast_results, insert_length):
+    #merging overlapping and contigous candidate regions
+    #format dictionary: {node_name: [(<start>,<send>,evalue, <qstart>,<qend>,<strand>, <score>)]}
     number_regions = 0
+    insert_length = int(insert_length)
+    score_list = []
     for key in blast_results:
         locations = blast_results[key]
         locations = sorted(locations, key = lambda x: int(x[3]))
-        #print("test")
-        #print(locations)
         size_list = len(locations)
-
         j = 0
-
         while j < size_list-1:
-            i = 1
-            while i < size_list-1:
-
-                if ((locations[j][0] < locations[i][0]) and (locations[j][1] > locations[i][0]) and (locations[j][5] == locations[i][5])):
-                    #merge overlapping regions
+            i = j + 1
+            while i < size_list:
+                if ((locations[j][0] < locations[i][0]) and (locations[j][1] > locations[i][0]) and (locations[j][5] == locations[i][5]) and (locations[i][5] == '+')):
+                    #merge overlapping regions plus strand
                     locations[j][1] = max(locations[j][1], locations[i][1])
                     locations[j][2] = min(locations[j][2], locations[i][2])
+                    locations[j][4] = max(locations[j][4], locations[i][4])
+                    locations[j][6] = max(locations[j][6], locations[i][6])
+                    locations.pop(i)
+                    size_list -= 1
+                    i -= 1
+                elif ((locations[j][1] > locations[i][1]) and (locations[j][0] < locations[i][1]) and (locations[j][5] == locations[i][5]) and (locations[i][5] == '-')):
+                    #merge overlapping regions minus strand
+                    locations[j][0] = min(locations[j][0], locations[i][0])
+                    locations[j][2] = min(locations[j][2], locations[i][2])
+                    locations[j][4] = max(locations[j][4], locations[i][4])
+                    locations[j][6] = max(locations[j][6], locations[i][6])
                     locations.pop(i)
                     size_list -= 1
                     i -= 1
-                elif ((locations[j][0] < locations[i][0]) and (locations[i][0] - locations[j][1] <= 2* insert_length) and (locations[j][5] == locations[i][5])):
-                    #print(j)
+                elif ((locations[j][0] < locations[i][0]) and (locations[i][0] - locations[j][1] <= 2*insert_length) and (locations[j][5] == locations[i][5]) and (locations[i][5] == '+')):
+                    #merging consecutive regions, the distance between booth is not longer than a cutoff, plus strand
                     locations[j][1] = max(locations[j][1], locations[i][1])
                     locations[j][2] = min(locations[j][2], locations[i][2])
+                    locations[j][4] = max(locations[j][4], locations[i][4])
+                    locations[j][6] = max(locations[j][6], locations[i][6])
+                    locations.pop(i)
+                    size_list -= 1
+                    i -=1
+                elif ((locations[j][1] > locations[i][1]) and (locations[j][0] - locations[i][1] <= 2* insert_length) and (locations[j][5] == locations[i][5]) and (locations[i][5] == '-')):
+                    #merging consecutive regions, the distance between booth is not longer than a cutoff, minus strand
+                    locations[j][0] = min(locations[j][0], locations[i][0])
+                    locations[j][2] = min(locations[j][2], locations[i][2])
+                    locations[j][4] = max(locations[j][4], locations[i][4])
+                    locations[j][6] = max(locations[j][6], locations[i][6])
                     locations.pop(i)
                     size_list -= 1
                     i -=1
                 i += 1
             j += 1
 
+        for entry in locations:
+            score_list.append(entry[6])
         number_regions += len(locations)
         blast_results[key] = locations
 
-    #print(blast_results)
-    return blast_results, number_regions
+    return blast_results, number_regions, score_list
 
-def parse_blast(line, blast_results):
-    # format blast line:  <contig> <sstart> <send> <evalue> <qstart> <qend> <strand>
-    #fomrat dictionary: {node_name: [(<start>,<end>)]}
-    #print(line)
+def parse_blast(line, blast_results, cutoff):
+    # format blast line:  <contig> <sstart> <send> <evalue> <qstart> <qend> <score>
+    # format dictionary: {node_name: [(<start>,<send>,evalue, <qstart>,<qend>,<strand>, <score>)]}
     line = line.replace("\n", "")
     line_info = line.split("\t")
-    #print(line_info)
     evalue = float(line_info[3])
-
     #cut off
-    if evalue > 0.00001:
+    if evalue > cutoff:
         return blast_results, evalue
     #add region to dictionary
     else:
-        node_name, sstart, send, qstart, qend = line_info[0], line_info[1], line_info[2], line_info[4], line_info[5]
+        node_name, sstart, send, qstart, qend, score = line_info[0], int(line_info[1]), int(line_info[2]), int(line_info[4]), int(line_info[5]), int(line_info[6])
         split = node_name.split("|")
-
-        # finding out on which strand tBLASTn founded a hit
+        # finding out on which strand tBLASTn found a hit
         if sstart < send:
             strand = "+"
         else:
-            sstart = line_info[2]
-            send = line_info[1]
+            sstart = int(line_info[2])
+            send = int(line_info[1])
             strand = "-"
-
-        #creating a dictionary that inlcudes every tBLASTn that is better as the evalue cut-off of 0.00001
+        #creating a dictionary that inlcudes every tBLASTn that is better as the evalue cut-off
         if len(split) > 1:
             node_name = split[1]
         if node_name in blast_results:
             list = blast_results[node_name]
-            list.append([int(sstart),int(send), evalue, int(qstart), int(qend), strand])
+            list.append([int(sstart),int(send), evalue, int(qstart), int(qend), strand, score])
             blast_results[node_name] = list
         else:
-            blast_results[node_name] = [[int(sstart),int(send), evalue, int(qstart), int(qend), strand]]
+            blast_results[node_name] = [[int(sstart),int(send), evalue, int(qstart), int(qend), strand, score]]
 
     return blast_results, evalue
 
-def candidate_regions(intron_length, evalue, tmp_path):
+def get_x_results(blast_dic, x, score_list):
+
+    new_dic = {}
+    score_list.sort(reverse=True)
+    min = score_list[x - 1]
+    number_regions = 0
+
+    for key in blast_dic:
+        key_list = []
+        entries = blast_dic[key]
+        for i in entries:
+            if i[6] >= min:
+                key_list.append(i)
+        if key_list != []:
+            new_dic[key] = key_list
+            number_regions += len(key_list)
+    return new_dic, number_regions
+
+def candidate_regions(intron_length, cutoff_evalue, tmp_path, x = 10):
     ###################### extracting candidate regions ########################
     # info about output blast http://www.metagenomics.wiki/tools/blast/blastn-output-format-6
     blast_file = open(tmp_path + "/blast_results.out", "r")
@@ -104,67 +193,144 @@ def candidate_regions(intron_length, evalue, tmp_path):
         if not line:
             break
         #parsing blast output
-        blast_results, evalue = parse_blast(line, blast_results)
-        #evalue cut-off
-        if not evalue <= evalue:
-            break
+        blast_results, evalue = parse_blast(line, blast_results, cutoff_evalue)
+
     if blast_results == {}:
+        blast_file.close()
         return 0,0
     else:
-        candidate_regions, number_regions = merge(blast_results, intron_length)
-        #candidate_regions, number_regions = merge_regions(blast_results, cut_off)
-        #print(candidate_regions, number_regions)
+        candidate_regions, number_regions, score_list = merge(blast_results, intron_length)
+        blast_file.close()
+        if number_regions > x:
+            candidate_regions, number_regions = get_x_results(candidate_regions, x, score_list)
         return candidate_regions, number_regions
 
-def extract_seq(region_dic, path, tmp_path):
-    #print(region_dic)
+def extract_seq(region_dic, path, tmp_path, mode):
+
     for key in region_dic:
         #print("blastdbcmd -db " + path + " -dbtype 'nucl' -entry " + key + " -out tmp/" + key + ".fasta -outfmt %f")
         cmd = "blastdbcmd -db " + path + " -dbtype 'nucl' -entry " + key + " -out " + tmp_path + key + ".fasta -outfmt %f"
-        result = subprocess.run(cmd, stderr = subprocess.PIPE, shell=True)
-
-def augustus_ppx(regions, candidatesOutFile, length_extension, profile_path, augustus_ref_species, ass_name, group, tmp_path):
+        starting_subprocess(cmd, mode)
+
+def extract_sequence_from_to(name, file, start, end):
+    #print(name)
+    out = name + ".fasta"
+    if int(start) < 0:
+        start = 0
+    with open(out,"w") as f:
+        for seq_record in SeqIO.parse(file, "fasta"):
+                f.write(">" + str(seq_record.id) + "\n")
+                sequence_length = len(seq_record.seq)
+                if int(end) > sequence_length:
+                    end = sequence_length
+                #for testing only
+                #start = 0
+                #end = len(seq_record.seq)
+                f.write(str(seq_record.seq[int(start):int(end)]) + "\n")
+
+    return out, start, end
+
+def augustus_ppx(regions, candidatesOutFile, length_extension, profile_path, augustus_ref_species, ass_name, group, tmp_path, mode):
     output = open(candidatesOutFile, "w")
 
     for key in regions:
         locations = regions[key]
         counter = 0
         for i in locations:
+            # some variables
             counter += 1
             start = str(i[0] - length_extension)
             end = str(i[1] + length_extension)
             name = key + "_" + str(counter)
-            #print("augustus --proteinprofile=" + profile_path + " --predictionStart=" + start + " --predictionEnd=" + end + " --species=" + augustus_ref_species + " tmp/" + key + ".fasta > tmp/" + key + ".gff")
-
+            # augutus call
             cmd = "augustus --protein=1 --proteinprofile=" + profile_path + " --predictionStart=" + start + " --predictionEnd=" + end + " --species=" + augustus_ref_species + " " + tmp_path + key + ".fasta > " + tmp_path + name + ".gff"
-            result = subprocess.run(cmd, stdout = subprocess.PIPE, shell=True)
+            #print(cmd)
+            starting_subprocess(cmd, 'silent')
+            # transfer augustus output to as sequence
             cmd = "getAnnoFasta.pl --seqfile=" + tmp_path + key + ".fasta " + tmp_path + name + ".gff"
-            result = subprocess.run(cmd, stderr = subprocess.PIPE, shell=True)
+            starting_subprocess(cmd, mode)
+            # parsing header and sequences
+            try:
+                sequence_file = open(tmp_path + name + ".aa", "r")
+                lines = sequence_file.readlines()
+                for line in lines:
+                    if line[0] == ">":
+                        id = line.replace(">", "")
+                        header = ">" + group + "|" + ass_name + "|" + name + "_" + id
+                        output.write(header)
+                    else:
+                        output.write(line)
+                sequence_file.close()
+            except FileNotFoundError:
+                pass
+                #print("No gene found in region with ID" + name + " in species " + ass_name + " , continuing with next region")
+    output.close()
 
-            sequence_file = open(tmp_path + name + ".aa", "r")
-            lines = sequence_file.readlines()
-            for line in lines:
-                if line[0] == ">":
-                    id = line.replace(">", "")
-                    header = ">" + group + "|" + ass_name + "|" + name + "_" + id
-                    output.write(header)
-                else:
-                    output.write(line)
-            sequence_file.close()
+def metaeuk_single(regions, candidatesOutFile, length_extension, ass_name, group, tmp_path, mode, db):
+    output = open(candidatesOutFile, "w")
+    region = open(candidatesOutFile.replace(".candidates.fa", ".regions.txt"), "w")
+    region.write("Conting/scaffold" + "\t" + "start" + "\t" + "end" + "\n")
+
+    for key in regions:
+        locations = regions[key]
+        counter = 0
+        for i in locations:
+            #some variables
+            counter += 1
+            start = str(i[0] - length_extension)
+            end = str(i[1] + length_extension)
+            name = key + "_" + str(counter)
+            file, start, end = extract_sequence_from_to(tmp_path + name, tmp_path + key + ".fasta", start, end)
+            region.write(file + "\t" + str(start) + "\t" + str(end))
+            #metaeuk call
+            cmd = "metaeuk easy-predict " + file + " " + db + " " + tmp_path + name + " " +  tmp_path + "/metaeuk --min-exon-aa 5 --max-overlap 5 --min-intron 1 --overlap 1"
+            #print(cmd)
+            # other parameteres used by BUSCO with metazoa set--max-intron 130000 --max-seq-len 160000 --min-exon-aa 5 --max-overlap 5 --min-intron 1 --overlap 1
+            starting_subprocess(cmd, mode)
+            # parsing header and sequences
+            try:
+                sequence_file = open(tmp_path + name + ".fas", "r")
+                lines = sequence_file.readlines()
+                #print(lines)
+                id = 0
+                for line in lines:
+                    if line[0] == ">":
+                        id += 1
+                        header = ">" + group + "|" + ass_name + "|" + name + "_" + str(id) + "\n"
+                        output.write(header)
+                    else:
+                        output.write(line)
+                sequence_file.close()
+
+                gff_file = open(tmp_path + name + ".gff", "r")
+                lines = gff_file.readlines()
+                new_lines = []
+                for line in lines:
+                    values = line.split("\t")
+                    values[3] = str(int(values[3]) + int(start))
+                    values[4] = str(int(values[4]) + int(start))
+                    new_lines.append("\t".join(values))
+                gff_file.close()
+                gff_file = open(tmp_path + name + ".gff", "w")
+                for line in new_lines:
+                    gff_file.write(line)
+                gff_file.close()
+            except FileNotFoundError:
+                pass
 
     output.close()
 
 def searching_for_db(assembly_path):
-    #print("test: " + str(assembly_path) + "\n")
+
     db_endings = ['.ndb', '.nhr', '.nin', '.nog', '.nos', '.not', '.nsq', '.ntf', '.nto']
     check = True
     for end in db_endings:
-        #print(assembly_path + end + "\n")
-        check = check and os.path.exists(assembly_path + end)
-        #print(check)
+        if not any(File.endswith(end) for File in os.listdir(assembly_path)):
+            check = False
     return check
 
 def get_distance_biopython(file, matrix):
+    #print(file)
     aln = AlignIO.read(open(file), 'fasta')
     calculator = DistanceCalculator(matrix)
     dm = calculator.get_distance(aln)
@@ -216,16 +382,28 @@ def checkCoOrthologs(candidate_name, best_hit, ref, fdog_ref_species, candidates
     if msaTool == "muscle":
         os.system("muscle -quiet -in " + output_file + " -out " + aln_file)
         #print("muscle -quiet -in " + output_file + " -out " + aln_file)
+        if not os.path.exists(aln_file):
+            print("Muscle failed for " + candidate_name + ". Making MSA with Mafft-linsi.")
+            os.system('mafft --maxiterate 1000 --localpair --anysymbol --quiet ' + output_file + ' > ' + aln_file)
+
     elif msaTool == "mafft-linsi":
         #print("mafft-linsi")
         os.system('mafft --maxiterate 1000 --localpair --anysymbol --quiet ' + output_file + ' > ' + aln_file)
 
-    #d_ref = get_distance(aln_file, best_hit, ref)
-    #d = get_distance(aln_file, best_hit, candidate_name)
-    distances = get_distance_biopython(aln_file, matrix)
+    try:
+        distances = get_distance_biopython(aln_file, matrix)
+        distance_hit_query = distances[best_hit, candidate_name]
+        distance_ref_hit = distances[best_hit, ref]
+        #print(distances)
+    except ValueError:
+        pass
+        #print("Failure in distance computation, Candidate  %s will be rejected" % candidate_name)
+        return 0, "NaN", "NaN"
 
-    distance_hit_query = distances[best_hit, candidate_name]
-    distance_ref_hit = distances[best_hit, ref]
+
+
+    #distance_hit_query = distances[best_hit, candidate_name]
+    #distance_ref_hit = distances[best_hit, ref]
 
     if distance_ref_hit < distance_hit_query:
         #accepted
@@ -235,7 +413,7 @@ def checkCoOrthologs(candidate_name, best_hit, ref, fdog_ref_species, candidates
         #rejected
         return 0, distance_ref_hit, distance_hit_query
 
-def backward_search(candidatesOutFile, fasta_path, strict, fdog_ref_species, evalue_cut_off, taxa, searchTool, checkCo, msaTool, matrix, dataPath, filter, tmp_path):
+def backward_search(candidatesOutFile, fasta_path, strict, fdog_ref_species, evalue_cut_off, taxa, searchTool, checkCo, msaTool, matrix, dataPath, filter, tmp_path, mode):
     # the backward search uses the genes predicted from augustus and makes a blastp search
     #the blastp search is against all species that are part of the core_ortholog group if the option --strict was chosen or only against the ref taxa
     seedDic = getSeedInfo(fasta_path)
@@ -248,10 +426,11 @@ def backward_search(candidatesOutFile, fasta_path, strict, fdog_ref_species, eva
         try:
             id_ref = seedDic[fdog_ref_species]
         except KeyError:
-            print("The fDOG reference species isn't part of the core ortholog group, ... exciting")
+            #print("The fDOG reference species isn't part of the core ortholog group, ... exciting")
             return 0, seed
         if searchTool == "blast":
-            os.system("blastp -db " + blast_dir_path + fdog_ref_species + "/" + fdog_ref_species + " -outfmt '6 sseqid qseqid evalue' -max_target_seqs 10 -out " + tmp_path + "blast_" + fdog_ref_species + " -evalue " + str(evalue_cut_off) + " -query " + candidatesOutFile)
+            cmd = "blastp -db " + blast_dir_path + fdog_ref_species + "/" + fdog_ref_species + " -outfmt '6 sseqid qseqid evalue' -max_target_seqs 10 -out " + tmp_path + "blast_" + fdog_ref_species + " -evalue " + str(evalue_cut_off) + " -query " + candidatesOutFile
+            starting_subprocess(cmd, mode)
         else:
             print("diamonds are the girls best friends")
             ##### diamond call
@@ -265,45 +444,46 @@ def backward_search(candidatesOutFile, fasta_path, strict, fdog_ref_species, eva
             id, gene, evalue = (line.replace("\n", "")).split("\t")
             gene_name = gene.split("|")[2]
             if gene_name != old_name:
-                print("candidate:%s"%(gene_name))
-                print("blast-hit:%s"%(id))
+                print("candidate:%s"%(gene_name)) if mode == "debug" else ""
+                print("blast-hit:%s"%(id)) if mode == "debug" else ""
                 min = float(evalue)
                 if id in id_ref:
                     orthologs.append(gene)
-                    print("\thitting\n")
+                    print("\thitting\n") if mode == "debug" else ""
                 else:
                     if checkCo == True:
                         for i in id_ref:
-                            print("Best hit %s differs from reference sequence %s! Doing further checks\n"%(id, i))
+                            print("Best hit %s differs from reference sequence %s! Doing further checks\n"%(id, i)) if mode == "debug" else ""
                             co_orthologs_result, distance_ref_hit, distance_hit_query = checkCoOrthologs(gene_name, id, i, fdog_ref_species, candidatesOutFile, msaTool, matrix, dataPath, tmp_path)
                             if co_orthologs_result == 1:
-                                print("\t Distance query - blast hit: %6.4f, Distance blast hit - reference: %6.4f\tAccepting\n"%(distance_hit_query, distance_ref_hit))
+                                print("\t Distance query - blast hit: %6.4f, Distance blast hit - reference: %6.4f\tAccepting\n"%(distance_hit_query, distance_ref_hit)) if mode == "debug" else ""
                                 orthologs.append(gene)
                             elif co_orthologs_result == 0:
-                                print("\t Distance query - blast hit: %6.4f, Distance blast hit - reference: %6.4f\tRejecting\n"%(distance_hit_query, distance_ref_hit))
+                                if distance_ref_hit != "NaN":
+                                    print("\t Distance query - blast hit: %6.4f, Distance blast hit - reference: %6.4f\tRejecting\n"%(distance_hit_query, distance_ref_hit)) if mode == "debug" else ""
                     else:
-                        print("\tnothitting\n")
+                        print("\tnothitting\n") if mode == "debug" else ""
             elif (gene_name == old_name) and float(evalue) == min and gene_name not in orthologs:
                 if id in id_ref:
                     orthologs.append(gene)
-                    print("\thitting\n")
+                    print("\thitting\n") if mode == "debug" else ""
                 else:
                     if checkCo == True:
                         for i in id_ref:
-                            print("Best hit %s differs from reference sequence %s! Doing further checks\n"%(id, i))
+                            print("Best hit %s differs from reference sequence %s! Doing further checks\n"%(id, i)) if mode == "debug" else ""
                             co_orthologs_result, distance_ref_hit, distance_hit_query = checkCoOrthologs(gene_name, id, i, fdog_ref_species, candidatesOutFile, msaTool, matrix, dataPath, tmp_path)
                             if co_orthologs_result == 1:
-                                print("\t Distance query - blast hit: %6.4f, Distance blast hit - reference: %6.4f\tAccepting\n"%(distance_hit_query, distance_ref_hit))
+                                print("\t Distance query - blast hit: %6.4f, Distance blast hit - reference: %6.4f\tAccepting\n"%(distance_hit_query, distance_ref_hit)) if mode == "debug" else ""
                                 orthologs.append(gene)
                             elif co_orthologs_result == 0:
-                                print("\t Distance query - blast hit: %6.4f, Distance blast hit - reference: %6.4f\tRejecting\n"%(distance_hit_query, distance_ref_hit))
+                                print("\t Distance query - blast hit: %6.4f, Distance blast hit - reference: %6.4f\tRejecting\n"%(distance_hit_query, distance_ref_hit)) if mode == "debug" else ""
                     else:
-                        print("\tnot hitting\n")
+                        print("\tnot hitting\n") if mode == "debug" else ""
             old_name = gene_name
 
 
         if orthologs == []:
-            print("No hit in the backward search, ...exciting")
+            #print("No hit in the backward search, ...exciting")
             return 0, seed
 
     else:
@@ -328,15 +508,16 @@ def backward_search(candidatesOutFile, fasta_path, strict, fdog_ref_species, eva
         orthologs = set({})
 
         for species in seed:
-            print("backward search in species " + species + "\n")
+            print("backward search in species %s\n" %species)
             orthologs_new = set({})
             try:
                 id_ref = seedDic[species]
             except KeyError:
-                print("The species " + species + " isn't part of the core ortholog group, ... exciting")
+                #print("The species " + species + " isn't part of the core ortholog group, ... exciting")
                 return 0, seed
 
-            os.system("blastp -db " + blast_dir_path + species + "/" + species + " -outfmt '6 sseqid qseqid evalue' -max_target_seqs 10 -seg " + filter + " -out " + tmp_path + "/blast_" + species + " -evalue " + str(evalue_cut_off) + " -query " + candidatesOutFile)
+            cmd = "blastp -db " + blast_dir_path + species + "/" + species + " -outfmt '6 sseqid qseqid evalue' -max_target_seqs 10 -seg " + filter + " -out " + tmp_path + "/blast_" + species + " -evalue " + str(evalue_cut_off) + " -query " + candidatesOutFile
+            starting_subprocess(cmd, mode)
             alg_file = open(tmp_path + "/blast_" + species, "r")
             lines = alg_file.readlines()
             alg_file.close()
@@ -355,23 +536,54 @@ def backward_search(candidatesOutFile, fasta_path, strict, fdog_ref_species, eva
 
             #print(species)
             #print(orthologs_new)
+            #print(orthologs)
             if species == fdog_ref_species:
                 orthologs = orthologs_new
             else:
                 orthologs = orthologs & orthologs_new
-                if orthologs == {}:
-                    print("No ortholog was found with option --strict")
+                if len(orthologs) == 0:
+                    #print("No ortholog was found with option --strict")
                     return 0, seed
 
-
-
     #print(orthologs)
+    orthologs = set(orthologs)
     return list(orthologs), seed
 
-def addSequences(sequenceIds, candidate_fasta, core_fasta, output, name, species_list, refBool, tmp_path):
-    #print("addSequences")
-    #print(sequenceIds)
+def addRef(output, core_fasta, species_list):
     #print(species_list)
+    output_file = open(output, "a+")
+    seq_records_core = readFasta(core_fasta)
+    seq_records_core = list(seq_records_core)
+    for species in species_list:
+        for entry_core in seq_records_core:
+            if species in entry_core.id:
+                output_file.write(">" + entry_core.id + "\n")
+                output_file.write(str(entry_core.seq) + "\n")
+    output_file.close()
+
+def addSeq(output, seq_list):
+    output_file = open(output, "a+")
+
+    for item in seq_list:
+        #print(item)
+        candidate_fasta = item[1]
+        sequenceIds = item[0]
+        if sequenceIds == 0 or sequenceIds == []:
+            continue
+        seq_records_candidate = readFasta(candidate_fasta)
+        seq_records_candidate = list(seq_records_candidate)
+        for entry_candidate in seq_records_candidate:
+            if entry_candidate.id in sequenceIds:
+                if entry_candidate.id == sequenceIds[0]:
+                    output_file.write(">" + entry_candidate.id + "|1" + "\n")
+                    output_file.write(str(entry_candidate.seq) + "\n")
+                else:
+                    output_file.write(">" + entry_candidate.id + "|0" + "\n")
+                    output_file.write(str(entry_candidate.seq) + "\n")
+    output_file.close()
+
+def addSequences(sequenceIds, candidate_fasta, core_fasta, output, name, species_list, refBool, tmp_path):
+
     output_file = open(output, "a+")
     if refBool == False:
         seq_records_core = readFasta(core_fasta)
@@ -382,20 +594,24 @@ def addSequences(sequenceIds, candidate_fasta, core_fasta, output, name, species
                     output_file.write(">" + entry_core.id + "\n")
                     output_file.write(str(entry_core.seq) + "\n")
 
-    seq_records_candidate = readFasta(candidate_fasta)
-    seq_records_candidate = list(seq_records_candidate)
-    for entry_candidate in seq_records_candidate:
-        #print(entry_candidate.id)
-        #print(sequenceIds)
-        if entry_candidate.id in sequenceIds:
-            output_file.write(">" + entry_candidate.id + "\n")
-            output_file.write(str(entry_candidate.seq) + "\n")
+    if sequenceIds != 0:
+        seq_records_candidate = readFasta(candidate_fasta)
+        seq_records_candidate = list(seq_records_candidate)
+        for entry_candidate in seq_records_candidate:
+            if entry_candidate.id in sequenceIds:
+                if entry_candidate.id == sequenceIds[0]:
+                    output_file.write(">" + entry_candidate.id + "|1" + "\n")
+                    output_file.write(str(entry_candidate.seq) + "\n")
+                else:
+                    output_file.write(">" + entry_candidate.id + "|0" + "\n")
+                    output_file.write(str(entry_candidate.seq) + "\n")
     output_file.close()
     return 0
 
 def createFasInput(orthologsOutFile, mappingFile):
     with open(orthologsOutFile, "r") as f:
         fas_seed_id = (f.readline())[1:-1]
+        #fas_seed_id = fas_seed_id.split("|")[0]
 
     mappingFile = open(mappingFile, "a+")
 
@@ -404,18 +620,22 @@ def createFasInput(orthologsOutFile, mappingFile):
         ncbi_id = (seq.id.split("@"))[1]
         mappingFile.write(seq.id + "\t" + "ncbi" + ncbi_id + "\n")
 
-
+    mappingFile.close()
     return fas_seed_id
 
 def cleanup(tmp, tmp_path):
     if tmp == False:
-        os.system('rm -r ' + tmp_path)
-
-def checkOptions():
-    pass
-    #muss ich unbedingt noch ergänzen wenn ich alle möglichen input Optionen implementiert habe!!!
+        timeout = time.time() + 60*1
+        while os.path.exists(tmp_path):
+            shutil.rmtree(tmp_path, ignore_errors=True)
+            if time.time() > timeout:
+                print("tmp folder could not be removed!")
+                break
 
 def coorthologs(candidate_names, tmp_path, candidatesFile, fasta, fdog_ref_species, msaTool, matrix):
+    if len(candidate_names) == 1:
+        return candidate_names
+
     candidates = readFasta(candidatesFile)
     ref = readFasta(fasta)
 
@@ -431,18 +651,19 @@ def coorthologs(candidate_names, tmp_path, candidatesFile, fasta, fdog_ref_speci
             f.write(str(record.seq) +  "\n")
             break
 
+    already_written = []
     for record in candidates:
         for name in candidate_names:
-            if name in record.id:
-                f.write(">" + name + "\n")
-                f.write(str(record.seq) + "\n")
+            if name == record.id:
+                if name not in already_written:
+                    f.write(">" + record.id + "\n")
+                    f.write(str(record.seq) + "\n")
+                    already_written.append(name)
     f.close()
 
     if msaTool == "muscle":
         os.system("muscle -quiet -in " + out + " -out " + aln_file)
-        #print("muscle -quiet -in " + output_file + " -out " + aln_file)
     elif msaTool == "mafft-linsi":
-        #print("mafft-linsi")
         os.system('mafft --maxiterate 1000 --localpair --anysymbol --quiet ' + out + ' > ' + aln_file)
 
     distances = get_distance_biopython(aln_file, matrix)
@@ -452,19 +673,160 @@ def coorthologs(candidate_names, tmp_path, candidatesFile, fasta, fdog_ref_speci
 
     for name in candidate_names:
         distance = distances[ref_id , name]
-        if distance < min_dist:
+        if distance <= min_dist:
             min_dist = distance
             min_name = name
 
-    checked = []
-
+    checked = [min_name]
 
     for name in candidate_names:
-        if distances[min_name , name] < distances[min_name , ref_id]:
+        if name == min_name:
+            pass
+        elif distances[min_name , name] <= distances[min_name , ref_id]:
             checked.append(name)
 
     return checked
 
+def clean_fas(path, file_type):
+    file = open(path, "r")
+    lines = file.readlines()
+    file.close()
+    file = open(path,"w")
+
+    for line in lines:
+        if file_type == 'domains':
+            long_id, remain = line.split("#")
+            id = long_id.split("|")[0]
+            new_line = id + "#" + remain
+        else:
+            long_id, remain = line.split("\t", 1)
+            id = long_id.split("|")[0]
+            new_line = id + "\t" + remain
+
+        file.write(new_line)
+    file.close()
+
+def ortholog_search_tblastn(args):
+    (asName, out, assemblyDir, consensus_path, augustus_ref_species, group, length_extension, average_intron_length, evalue, strict, fdog_ref_species, msaTool, matrix, dataPath, filter, mode, fasta_path, profile_path, taxa, searchTool, checkCoorthologs, gene_prediction, metaeuk_db) = args
+    output = []
+    cmd = 'mkdir ' + out + '/tmp/' + asName
+    starting_subprocess(cmd, 'silent')
+    tmp_path = out + "tmp/" + asName + "/"
+    candidatesOutFile = tmp_path + group + ".candidates.fa"
+    #orthologsOutFile = out + "/" + group + ".extended.fa"
+    fasOutFile = out + "/" + group
+    #mappingFile = out + "/tmp/" + group + ".mapping.txt"
+
+    output.append("Searching in species " + asName + "\n")
+    assembly_path = assemblyDir + "/" + asName + "/" + asName + ".fa"
+    db_path = assemblyDir + "/" + asName + "/blast_dir/" + asName + ".fa"
+    blast_dir_path = assemblyDir + "/" + asName + "/blast_dir/"
+    db_check = searching_for_db(blast_dir_path)
+
+    if db_check == 0:
+        cmd = 'makeblastdb -in ' + assembly_path + ' -dbtype nucl -parse_seqids -out ' + db_path
+        starting_subprocess(cmd, mode)
+
+    #makes a tBLASTn search against database
+    #codon table argument [-db_gencode int_value], table available ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
+    cmd = 'tblastn -db ' + db_path + ' -query ' + consensus_path + ' -outfmt "6 sseqid sstart send evalue qstart qend score " -evalue ' + str(evalue) + ' -out ' + tmp_path + '/blast_results.out'
+    time_tblastn_start = time.time()
+    exit_code = starting_subprocess(cmd, mode, 3600)
+    time_tblastn_end = time.time()
+    time_tblastn = time_tblastn_end - time_tblastn_start
+    if exit_code == 1:
+        output.append("The tblastn search takes too long for species %s. Skipping species ..." % asName)
+        return [], candidatesOutFile, output
+
+    output.append("Time tblastn %s in species %s" % (str(time_tblastn), asName))
+
+    regions, number_regions = candidate_regions(average_intron_length, evalue, tmp_path)
+    if regions == 0:
+        #no candidat region are available, no ortholog can be found
+        output.append("No candidate region found for species %s!\n" % asName)
+        return [], candidatesOutFile, output
+
+    else:
+        output.append(str(number_regions) + " candiate region(s) were found for species %s.\n" % asName)
+        extract_seq(regions, db_path, tmp_path, mode)
+
+
+    if gene_prediction == "augustus":
+        ############### make Augustus PPX search ###################################
+        time_augustus_start = time.time()
+        augustus_ppx(regions, candidatesOutFile, length_extension, profile_path, augustus_ref_species, asName, group, tmp_path, mode)
+        time_augustus_end = time.time()
+        time_augustus = time_augustus_end - time_augustus_start
+        output.append("Time augustus: %s species %s \n" % (str(time_augustus), asName))
+    else:
+        time_metaeuk_start = time.time()
+        if metaeuk_db == '':
+            db = fasta_path
+        else:
+            db = metaeuk_db
+        metaeuk_single(regions, candidatesOutFile, length_extension, asName, group, tmp_path, mode, db)
+        time_metaeuk_end = time.time()
+        time_metaeuk = time_metaeuk_end - time_metaeuk_start
+        output.append("Time metaeuk: %s species %s \n" % (str(time_metaeuk), asName))
+
+    ################# backward search to filter for orthologs###################
+    if int(os.path.getsize(candidatesOutFile)) <= 0:
+        #print("No genes found at candidate regions\n")
+        return [], candidatesOutFile, output
+
+    reciprocal_sequences, taxa = backward_search(candidatesOutFile, fasta_path, strict, fdog_ref_species, evalue, taxa, searchTool, checkCoorthologs, msaTool, matrix, dataPath, filter, tmp_path, mode)
+
+    if reciprocal_sequences == 0:
+        if regions != 0:
+            output.append("No ortholog fulfilled the reciprocity criteria for species %s.\n" % asName)
+        return [], candidatesOutFile, output
+    else:
+        reciprocal_sequences = coorthologs(reciprocal_sequences, tmp_path, candidatesOutFile, fasta_path, fdog_ref_species, msaTool, matrix)
+
+    return reciprocal_sequences, candidatesOutFile, output
+
+def blockProfiles(core_path, group, mode, out):
+
+    ######################## paths ################################
+    msa_path = core_path + "/" + group +"/"+ group + ".aln"
+    check_path(msa_path)
+    profile_path = out + "/tmp/" + group + ".prfl"
+
+    ######################## block profile #####################################
+
+    print("Building a block profile ...")
+    cmd = 'msa2prfl.pl ' + msa_path + ' --setname=' + group + ' >' + profile_path
+    starting_subprocess(cmd, 'silent')
+
+    if int(os.path.getsize(profile_path)) > 0:
+        print("\t ...finished \n")
+    else:
+        print("Building block profiles failed. Using prepareAlign to convert alignment\n")
+        new_path = core_path + group +"/"+ group + "_new.aln"
+        cmd = 'prepareAlign < ' + msa_path + ' > ' + new_path
+        starting_subprocess(cmd, mode)
+        cmd = 'msa2prfl.pl ' + new_path + ' --setname=' + group + ' >' + profile_path
+        starting_subprocess(cmd, 'silent')
+        print(" \t ...finished \n")
+
+    return profile_path
+
+def consensusSequence(core_path, group, mode, out):
+
+    ######################## paths ################################
+    hmm_path = core_path + "/" + group +"/hmm_dir/"+ group + ".hmm"
+    check_path(hmm_path)
+    consensus_path = out + "/tmp/" + group + ".con"
+
+    ######################## consensus sequence ################################
+    #make a majority-rule consensus sequence with the tool hmmemit from hmmer
+    print("Building a consensus sequence")
+    cmd = 'hmmemit -c -o' + consensus_path + ' ' + hmm_path
+    starting_subprocess(cmd, mode)
+    print("\t ...finished\n")
+
+    return consensus_path
+
 class Logger(object):
     def __init__(self, file):
         self.file = file
@@ -478,24 +840,23 @@ def write(self, message):
     def flush(self):
         pass
 
-
 def main():
 
-    #################### handle user input ########################################
-
-    version = '0.0.1'
+    #################### handle user input #####################################
 
+    start = time.time()
+    version = '0.1.3'
+    ################### initialize parser ######################################
     parser = argparse.ArgumentParser(description='You are running fdog.assembly version ' + str(version) + '.')
     parser.add_argument('--version', action='version', version=str(version))
-
+    ################## required arguments ######################################
     required = parser.add_argument_group('Required arguments')
     required.add_argument('--gene', help='Core_ortholog group name. Folder inlcuding the fasta file, hmm file and aln file has to be located in core_orthologs/',
                             action='store', default='', required=True)
-    required.add_argument('--augustusRefSpec', help='augustus reference species', action='store', default='', required=True)
-    required.add_argument('--refSpec', help='Reference taxon for fDOG.', action='store', default='', required=True)
-
+    required.add_argument('--refSpec', help='Reference taxon/taxa for fDOG.', action='store', nargs="+", default='', required=True)
+    ################## optional arguments ######################################
     optional = parser.add_argument_group('Optional arguments')
-    optional.add_argument('--avIntron', help='average intron length of the assembly species in bp (default: 5000)',action='store', default=5000, type=int)
+    optional.add_argument('--avIntron', help='average intron length of the assembly species in bp (default: 50000)',action='store', default=50000, type=int)
     optional.add_argument('--lengthExtension', help='length extension of the candidate regions in bp (default:5000)', action='store', default=5000, type=int)
     optional.add_argument('--assemblyPath', help='Path for the assembly directory', action='store', default='')
     optional.add_argument('--tmp', help='tmp files will not be deleted', action='store_true', default = False)
@@ -508,34 +869,34 @@ def main():
     optional.add_argument('--msaTool', help='Choose between mafft-linsi or muscle for the multiple sequence alignment. DEFAULT: muscle', choices=['mafft-linsi', 'muscle'], action='store', default='muscle')
     optional.add_argument('--checkCoorthologsRef', help='During the final ortholog search, accept an ortholog also when its best hit in the reverse search is not the core ortholog itself, but a co-ortholog of it', action='store_true', default=False)
     optional.add_argument('--scoringmatrix', help='Choose a scoring matrix for the distance criteria used by the option --checkCoorthologsRef. DEFAULT: blosum62', choices=['identity', 'blastn', 'trans', 'benner6', 'benner22', 'benner74', 'blosum100', 'blosum30', 'blosum35', 'blosum40', 'blosum45', 'blosum50', 'blosum55', 'blosum60', 'blosum62', 'blosum65', 'blosum70', 'blosum75', 'blosum80', 'blosum85', 'blosum90', 'blosum95', 'feng', 'fitch', 'genetic', 'gonnet', 'grant', 'ident', 'johnson', 'levin', 'mclach', 'miyata', 'nwsgappep', 'pam120', 'pam180', 'pam250', 'pam30', 'pam300', 'pam60', 'pam90', 'rao', 'risler', 'structure'], action='store', default='blosum62')
-    optional.add_argument('--coreTaxa', help='List of core taxa used during --strict', action='store', default='')
-    optional.add_argument('--filter', help='Switch the low complexity filter for the blast search on.', action='store', default='no')
+    optional.add_argument('--coreTaxa', help='List of core taxa used during --strict', action='store', nargs="+", default=[])
+    #optional.add_argument('--filter', help='Switch the low complexity filter for the blast search on.', action='store', default='no')
     optional.add_argument('--fasoff', help='Turn OFF FAS support', action='store_true', default=False)
     optional.add_argument('--pathFile', help='Config file contains paths to data folder (in yaml format)', action='store', default='')
-    optional.add_argument('--searchTaxon', help='Search Taxon name', action='store', default='')
+    optional.add_argument('--searchTaxa', help='List of Taxa to search in', action='store', nargs="+", default=[])
     optional.add_argument('--silent', help='Output will only be written into the log file', action='store_true', default=False)
-
+    optional.add_argument('--debug', help='Stdout and Stderr from fdog.assembly and every used tool will be printed', action='store_true', default=False)
+    optional.add_argument('--force', help='Overwrite existing output files', action='store_true', default=False)
+    optional.add_argument('--append', help='Append the output to existing output files', action='store_true', default=False)
+    optional.add_argument('--parallel', help= 'The ortholog search of multiple species will be done in parallel', action='store_true', default=False)
+    optional.add_argument('--augustus', help= 'Gene prediction is done by using the tool Augustus PPX', action='store_true', default=False)
+    optional.add_argument('--augustusRefSpec', help='augustus reference species', action='store', default='')
+    optional.add_argument('--metaeukDb', help='path to metaeuk reference database', action='store', default='')
     args = parser.parse_args()
 
     # required
     group = args.gene
-    augustus_ref_species = args.augustusRefSpec
     fdog_ref_species = args.refSpec
     #paths user input
     assemblyDir = args.assemblyPath
     dataPath = args.dataPath
     core_path = args.coregroupPath
-    out = args.out + "/"
+    out = args.out
     pathFile = args.pathFile
     #I/O
     tmp = args.tmp
     strict = args.strict
     checkCoorthologs = args.checkCoorthologsRef
-    filter = args.filter
-    if filter == True or filter == 'yes':
-        filter = 'yes'
-    else:
-        filter = 'no'
     #others
     average_intron_length = args.avIntron
     length_extension = args.lengthExtension
@@ -544,35 +905,38 @@ def main():
     msaTool = args.msaTool
     matrix = args.scoringmatrix
     taxa = args.coreTaxa
-    if taxa == '':
-        taxa =[]
-    else:
-        taxa = taxa.split(",")
     fasoff = args.fasoff
-    searchTaxon = args.searchTaxon
+    searchTaxa = args.searchTaxa
     silent = args.silent
+    debug = args.debug
+    force = args.force
+    append = args.append
+    parallel = args.parallel
+    augustus_ref_species = args.augustusRefSpec
+    metaeuk_db = args.metaeukDb
 
-    ###################### How to handling std output ##########################
-    # if silent == True:
-    #     print(out + "fdog.log \n")
-    #     f = open(out + "fdog.log", "a+")
-    #     sys.stdout = f
-    # else:
-    #     print(out + "fdog.log \n")
-    #     sys.stdout = Logger(out)
-
-    try:
-        f = open(out + "fdog.log", "a+")
-    except FileNotFoundError:
-        f = open(out + "fdog.log", "w")
-
+    #gene prediction tool
+    augustus = args.augustus
+    if augustus == True:
 
-    if silent == True:
-        sys.stderr = f
-        sys.stdout = f
+        if augustus_ref_species == '':
+            print("Augustus reference species is required when using Augustus as gene prediction tool")
+            return 1
+        gene_prediction = "augustus"
     else:
-        sys.stdout = Logger(f)
+        gene_prediction = "metaeuk"
 
+    # output modes
+    if debug == True and silent == True:
+        print("It's not possible to use booth modes, please restart and use --debug or --silent")
+        return 1
+    else:
+        if debug == True:
+            mode = 'debug'
+        elif silent == True:
+            mode = 'silent'
+        else:
+            mode = 'normal'
 
     #checking paths
     if dataPath == '':
@@ -590,178 +954,183 @@ def main():
             except:
                 dataPath = 'config'
 
-    if assemblyDir == '':
-        assemblyDir = dataPath + '/assembly_dir/'
     if out == '':
         out = os.getcwd()
-    if core_path == '':
-        core_path = out + '/core_orthologs/'
-
-
-    # user input has to be checked here before fDOGassembly continues
-
-    assembly_names = os.listdir(assemblyDir)
-
-
-
-    ########################## some variables ##################################
-
-    refBool = False # checks if sequences of reference species were already part of the extended.fa file
-    ########### paths ###########
-
-    msa_path = core_path + "/" + group +"/"+ group + ".aln"
-    hmm_path = core_path + "/" + group +"/hmm_dir/"+ group + ".hmm"
-    fasta_path = core_path + "/" + group +"/"+ group + ".fa"
-    consensus_path = out + "/tmp/" + group + ".con"
-    profile_path = out + "/tmp/" + group + ".prfl"
-
-    ###################### create tmp folder ###################################
-
-    os.system('mkdir ' + out + '/tmp')
-
-    ######################## consensus sequence ################################
-
-    #make a majority-rule consensus sequence with the tool hmmemit from hmmer
-    print("Building a consensus sequence \n")
-    os.system('hmmemit -c -o' + consensus_path + ' ' + hmm_path)
-    print("consensus sequence is finished\n")
-
-    ######################## block profile #####################################
-
-    print("Building a block profile \n")
-    cmd = 'msa2prfl.pl ' + msa_path + ' --setname=' + group + ' >' + profile_path
-    #os.system('msa2prfl.pl ' + msa_path + ' --setname=' + group + ' >' + profile_path)
-    result = subprocess.run(cmd, stderr = subprocess.PIPE, shell=True)
-
-    #print(os.path.getsize(profile_path))
-    if int(os.path.getsize(profile_path)) > 0:
-        print("block profile is finished \n")
     else:
-        print("Building block profiles failed. Using prepareAlign to convert alignment\n")
-        new_path = core_path + group +"/"+ group + "_new.aln"
-        cmd = 'prepareAlign < ' + msa_path + ' > ' + new_path
-        result = subprocess.run(cmd, stderr = subprocess.PIPE, shell=True)
-        cmd = 'msa2prfl.pl ' + new_path + ' --setname=' + group + ' >' + profile_path
-        result = subprocess.run(cmd, stderr = subprocess.PIPE, shell=True)
-        print("block profile is finished \n")
-
-
-    searchBool = False
-
-    for asName in assembly_names:
-        if searchBool == True:
-            break
-        if searchTaxon != '' and searchBool == False:
-            asName = searchTaxon
-            searchBool = True
-
-        ################### path definitions ###################################
-        os.system('mkdir ' + out + '/tmp/' + asName)
-        tmp_path = out + "/tmp/" + asName + "/"
-        candidatesOutFile = tmp_path + group + ".candidates.fa"
-        if searchTaxon != '':
-            orthologsOutFile = out + "/" + group + "_" + asName + ".extended.fa"
-            fasOutFile = out + "/" + group + "_" + asName
-            mappingFile = tmp_path + group + "_" + asName + ".mapping.txt"
+        if out[-1] != "/":
+            out = out + "/"
+        check_path(out)
+
+    if os.path.exists(out + '/' + group):
+        if append != True and force != True:
+            print("Output folder for group " + group + " exists already. Please choose --force or --append.")
+            sys.exit()
+        elif force == True:
+            shutil.rmtree(out + '/' + group, ignore_errors=True)
+            refBool = False
+            os.system('mkdir ' + out + '/' + group + ' >/dev/null 2>&1')
+            out = out + '/' + group + '/'
+        elif append == True:
+            out = out + '/' + group + '/'
+            refBool = True
         else:
-            orthologsOutFile = out + "/" + group + ".extended.fa"
-            fasOutFile = out + "/" + group
-            mappingFile = out + "/tmp/" + group + ".mapping.txt"
-
-
-        print("Searching in species " + asName + "\n")
-        assembly_path = assemblyDir + "/" + asName + "/" + asName + ".fa"
-        db_path = assemblyDir + "/" + asName + "/blast_dir/" + asName + ".fa"
-    ######################## tBLASTn ###########################################
-
-    #database anlegen
+            refBool = False # checks if sequences of reference species were already part of the extended.fa file
+    else:
+        os.system('mkdir ' + out + '/' + group + ' >/dev/null 2>&1')
+        out = out + '/' + group + '/'
+        refBool = False
 
-        db_check = searching_for_db(db_path)
-        #print(assembly_path)
-        if db_check == 0:
-            print("creating a blast data base \n")
-            os.system('makeblastdb -in ' + assembly_path + ' -dbtype nucl -parse_seqids -out ' + db_path)
-            print("database is finished \n")
-        else:
-            print('blast data base exists already, continuing...')
+    if core_path == '':
+        core_path = out + '/core_orthologs/'
+    else:
+        if not core_path.endswith('/'):
+            core_path = core_path + '/'
+        check_path(core_path)
 
+    if assemblyDir == '':
+        assemblyDir = dataPath + '/assembly_dir/'
+    check_path(assemblyDir)
 
-    #make a tBLASTn search against the new database
-    #codon table argument [-db_gencode int_value], table available ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
+    if metaeuk_db != '':
+        check_path(metaeuk_db)
 
-        print("tBLASTn search against data base")
-        os.system('tblastn -db ' + db_path + ' -query ' + consensus_path + ' -outfmt "6 sseqid sstart send evalue qstart qend " -out ' + tmp_path + '/blast_results.out')
-        print("tBLASTn search is finished")
 
-    ################### search for candidate regions and extract seq ###########
+    try:
+        f = open(out + "/fdog.log", "a+")
+    except FileNotFoundError:
+        f = open(out + "/fdog.log", "w")
 
-    # parse blast and filter for candiate regions
-        regions, number_regions = candidate_regions(average_intron_length, evalue, tmp_path)
+    ################## How to handle std output and std error ##################
 
-        if regions == 0:
-            #no candidat region are available, no ortholog can be found
-            print("No candidate region found")
-            continue
+    if mode == 'silent':
+        sys.stderr = f
+        sys.stdout = f
+    else:
+        sys.stdout = Logger(f)
 
+    ########################### other variables ################################
+    if searchTaxa == []:
+        assembly_names = os.listdir(assemblyDir)
+    else:
+        if len(searchTaxa) > 1:
+            assembly_names = os.listdir(assemblyDir)
+            for Taxon in searchTaxa:
+                if Taxon not in assembly_names:
+                    print("Taxon %s is not in the assembly_dir" % Taxon)
+                    sys.exit()
+            assembly_names = searchTaxa
         else:
-            print(str(number_regions) + " candiate regions were found. Extracting sequences...")
-            extract_seq(regions, db_path, tmp_path)
-
-    ############### make Augustus PPX search ###################################
-        print("starting augustus ppx \n")
-        augustus_ppx(regions, candidatesOutFile, length_extension, profile_path, augustus_ref_species, asName, group, tmp_path)
-        print("augustus is finished \n")
+            if searchTaxa[0] in os.listdir(assemblyDir):
+                assembly_names = searchTaxa
+            elif os.path.isfile(searchTaxa[0]):
+                with open(searchTaxa[0]) as file:
+                    lines = file.readlines()
+                    assembly_names = [line.rstrip() for line in lines]
+            else:
+                print("Input %s for search Taxa is not in the assembly_dir or an existing file" % searchTaxa[0])
 
-    ################# backward search to filter for orthologs###################
-        reciprocal_sequences, taxa = backward_search(candidatesOutFile, fasta_path, strict, fdog_ref_species, evalue, taxa, searchTool, checkCoorthologs, msaTool, matrix, dataPath, filter, tmp_path)
+    ################################# paths ####################################
 
+    fasta_path = core_path + "/" + group +"/"+ group + ".fa"
+    check_path(fasta_path)
+    tmp_folder = out + "/tmp"
 
-        if reciprocal_sequences == 0:
-            print("No ortholog fulfilled the reciprocity criteria")
-            if searchTaxon == '':
-                continue
-            else:
-                cleanup(tmp, tmp_path)
-                return 1
+    ########### is/are fDOG reference species part of ortholog group? ##########
 
-    ################## checking accepted genes for co-orthologs ##########################
-        print(reciprocal_sequences)
-        reciprocal_sequences = coorthologs(reciprocal_sequences, tmp_path, candidatesOutFile, fasta_path, fdog_ref_species, msaTool, matrix)
+    fdog_ref_species = check_ref_sepc(fdog_ref_species, fasta_path)
 
+    ###################### create tmp folder ###################################
 
+    cmd = 'mkdir ' + out + '/tmp'
+    starting_subprocess(cmd, 'silent')
 
-    ################ add sequences to extended.fa in the output folder##########
-        addSequences(reciprocal_sequences, candidatesOutFile, fasta_path, orthologsOutFile, group, taxa, refBool, tmp_path)
-        refBool = True
+    print("Gene: " + group)
+    print("fDOG reference species: " + fdog_ref_species + " \n")
 
-    ############### make Annotation with FAS ###################################
-        if searchTaxon != '' and fasoff == False:
-            fas_seed_id = createFasInput(orthologsOutFile, mappingFile)
-            # bug in calcFAS when using --tsv, have to wait till it's fixed before I can use the option
-            os.system('mkdir ' + tmp_path + 'anno_dir')
-            os.system('calcFAS --seed ' + fasta_path + ' --query ' + orthologsOutFile + ' --annotation_dir ' + tmp_path + 'anno_dir --bidirectional --phyloprofile ' + mappingFile + ' --seed_id "' + fas_seed_id + '" --out_dir ' + out + ' --out_name ' + group + '_' + asName )
+    ###################### preparations ########################################
 
+    if augustus == True:
+        group_computation_time_start = time.time()
+        consensus_path = consensusSequence(core_path, group, mode, out)
+        profile_path = blockProfiles(core_path, group, mode, out)
+        group_computation_time_end = time.time()
+        time_group = group_computation_time_end - group_computation_time_start
+    else:
+        #print("test")
+        profile_path = ""
+        group_computation_time_start = time.time()
+        consensus_path = consensusSequence(core_path, group, mode, out)
+        #concatinade core_group sequences if metaeuk should be run without tblastn
+        group_computation_time_end = time.time()
+        time_group = group_computation_time_end - group_computation_time_start
+
+
+    ###################### ortholog search #####################################
+
+    ortholog_sequences = []
+    time_ortholog_start = time.time()
+
+    if parallel == True:
+        ##################### parallel computation #############################
+        calls = []
+        cpus = mp.cpu_count()
+        pool = mp.Pool(cpus)
+        for asName in assembly_names:
+            calls.append([asName, out, assemblyDir, consensus_path, augustus_ref_species, group, length_extension, average_intron_length, evalue, strict, fdog_ref_species, msaTool, matrix, dataPath, filter, mode, fasta_path, profile_path, taxa, searchTool, checkCoorthologs, gene_prediction, metaeuk_db])
+
+        results = (pool.imap_unordered(ortholog_search_tblastn, calls))
+        pool.close()
+        pool.join()
+        for i in results:
+            ortholog_sequences.append([i[0], i[1]])
+            for k in i[2]:
+                print(k)
+    else:
+        ###################### computation species wise ################
+        for asName in assembly_names:
+            args = [asName, out, assemblyDir, consensus_path, augustus_ref_species, group, length_extension, average_intron_length, evalue, strict, fdog_ref_species, msaTool, matrix, dataPath, filter, mode, fasta_path, profile_path, taxa, searchTool, checkCoorthologs, gene_prediction, metaeuk_db]
+            reciprocal_sequences, candidatesOutFile, output_ortholog_search = ortholog_search_tblastn(args)
+            ortholog_sequences.append([reciprocal_sequences, candidatesOutFile])
+            for k in output_ortholog_search:
+                print(k)
+
+    time_ortholog_end = time.time()
+    time_ortholog = time_ortholog_end - time_ortholog_start
+
+    ################## preparing output ########################################
+    orthologsOutFile = out + "/" + group + ".extended.fa"
+
+    if taxa == []:
+        taxa = [fdog_ref_species]
+    if append == True:
+        addSeq(orthologsOutFile, ortholog_sequences)
+    else:
+        addRef(orthologsOutFile, fasta_path, taxa)
+        addSeq(orthologsOutFile, ortholog_sequences)
+    mappingFile = out + "/tmp/" + group + ".mapping.txt"
 
-    if refBool == False and searchTaxon == '':
-        print("No orthologs found. Exciting ...")
-        cleanup(tmp, tmp_path)
-        return 1
+    if fasoff == False:
+        fas = time.time()
+        print("Calculating FAS scores ...")
 
-    if fasoff == False and searchTaxon == '':
         tmp_path = out + '/tmp/'
         fas_seed_id = createFasInput(orthologsOutFile, mappingFile)
-        # bug in calcFAS when using --tsv, have to wait till it's fixed before I can use the option
-        os.system('calcFAS --seed ' + fasta_path + ' --query ' + orthologsOutFile + ' --annotation_dir ' + tmp_path + 'anno_dir --bidirectional --phyloprofile ' + mappingFile + ' --seed_id "' + fas_seed_id + '" --out_dir ' + out + ' --out_name ' + group )
-
+        cmd = 'fas.run --seed ' + fasta_path + ' --query ' + orthologsOutFile + ' --annotation_dir ' + tmp_path + 'anno_dir --bidirectional --tsv --phyloprofile ' + mappingFile + ' --seed_id "' + fas_seed_id + '" --out_dir ' + out + ' --out_name ' + group
+        starting_subprocess(cmd, 'silent')
+        clean_fas(out + group + "_forward.domains", 'domains')
+        clean_fas(out + group + "_reverse.domains", 'domains')
+        clean_fas(out + group + ".phyloprofile", 'phyloprofile')
+        print("\t ...finished \n")
 
     ################# remove tmp folder ########################################
-    if searchTaxon != '':
-        cleanup(tmp, tmp_path)
-    else:
-        cleanup(tmp, out + "/tmp/")
+    end = time.time()
+    time_fas = end - fas
+    print("fDOG-Assembly finished completely in " + str(end-start) + "seconds.")
+    print("Group preparation: %s \t Ortholog search: %s \t FAS: %s \n" % (str(time_group), str(time_ortholog), str(time_fas)))
+    sys.stdout = sys.__stdout__
 
     f.close()
-
+    cleanup(tmp, tmp_folder)
 
 if __name__ == '__main__':
     main()
diff --git a/fdog/mergeAssemblyOutput.py b/fdog/mergeAssemblyOutput.py
deleted file mode 100644
index ea6e084..0000000
--- a/fdog/mergeAssemblyOutput.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# -*- coding: utf-8 -*-
-
-#######################################################################
-# Copyright (C) 2020 Vinh Tran
-#
-#  This script is used to merge all output files (.extended.fa, .phyloprofile,
-#  _forward.domains, _reverse.domains) in a given directory into one file each.
-#
-#  This script is distributed in the hope that it will be useful,
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#  GNU General Public License <http://www.gnu.org/licenses/> for
-#  more details
-#
-#  Contact: hannah.muelbaier@stud.uni-frankfurt.de
-#
-#######################################################################
-
-import sys
-import os
-from os import listdir as ldir
-import argparse
-from pathlib import Path
-
-def main():
-    version = '0.0.1'
-    parser = argparse.ArgumentParser(description='You are running fdog.mergeAssemblyOutput version ' + str(version) + '.')
-    parser.add_argument('-i','--input', help='Input directory, where all single output (.extended.fa, .phyloprofile, _forward.domains, _reverse.domains) can be found',
-                        action='store', default='', required=True)
-    parser.add_argument('-o','--output', help='Output name', action='store', default='', required=True)
-    parser.add_argument('-c', '--cleanup', help='Deletes the merged output files from fDOG', action='store_true', default=False)
-    args = parser.parse_args()
-
-    directory = args.input
-    out = args.output
-    cleanup = args.cleanup
-    if not os.path.exists(os.path.abspath(directory)):
-        sys.exit('%s not found' % directory)
-    else:
-        directory = os.path.abspath(directory)
-
-    phyloprofile = None
-    set_phylo = set()
-    domains_0 = None
-    set_domains_f = set()
-    domains_1 = None
-    set_domains_r = set()
-    ex_fasta = None
-    set_fasta = set()
-    header_bool = False
-    for infile in ldir(directory):
-        if infile.endswith('.phyloprofile') and not infile == out + '.phyloprofile':
-            if not phyloprofile:
-                phyloprofile = open(out + '.phyloprofile', 'w')
-                phyloprofile.write('geneID\tncbiID\torthoID\tFAS_F\tFAS_B\n')
-            with open(directory + '/' + infile, 'r') as reader:
-                lines = reader.readlines()
-                for line in lines:
-                    if line != 'geneID\tncbiID\torthoID\tFAS_F\tFAS_B\n' and line not in set_phylo:
-                        phyloprofile.write(line)
-                if len(lines) > 1:
-                    set_phylo = set(lines)
-            if cleanup == True:
-                os.remove(directory + '/' + infile)
-        elif infile.endswith('_forward.domains') and not infile == out + '_forward.domains':
-            if not domains_0:
-                domains_0 = open(out + '_forward.domains', 'w')
-            with open(directory + '/' + infile, 'r') as reader:
-                lines = reader.readlines()
-                for line in lines:
-                    if line not in set_domains_f:
-                        domains_0.write(line)
-                if len(lines) > 1:
-                    set_domains_f = set(lines)
-            if cleanup == True:
-                os.remove(directory + '/' + infile)
-        elif infile.endswith('_reverse.domains') and not infile == out + '_reverse.domains':
-            if not domains_1:
-                domains_1 = open(out + '_reverse.domains', 'w')
-            with open(directory + '/' + infile, 'r') as reader:
-                lines = reader.readlines()
-                for line in lines:
-                    if line not in set_domains_r:
-                        domains_1.write(line)
-                if len(lines) > 1:
-                    set_domains_r = set(lines)
-            if cleanup == True:
-                os.remove(directory + '/' + infile)
-        elif infile.endswith('.extended.fa') and not infile == out + '.extended.fa':
-            if not ex_fasta:
-                ex_fasta = open(out + '.extended.fa', 'w')
-            with open(directory + '/' + infile, 'r') as reader:
-                lines = reader.readlines()
-                header = set()
-                #print(set_fasta)
-                for line in lines:
-                    if line[0] == ">":
-                        header.add(line)
-                        if line not in set_fasta:
-                            ex_fasta.write(line)
-                            header_bool = True
-                        else:
-                            header_bool = False
-                    else:
-                        if header_bool == True:
-                            ex_fasta.write(line)
-                set_fasta = header
-            if cleanup == True:
-                os.remove(directory + '/' +infile)
-
-    if phyloprofile:
-        phyloprofile.close()
-    if domains_0:
-        domains_0.close()
-    if domains_1:
-        domains_1.close()
-    if ex_fasta:
-        ex_fasta.close()
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/fdog/runMulti.py b/fdog/runMulti.py
index 7d73cdd..be552a7 100644
--- a/fdog/runMulti.py
+++ b/fdog/runMulti.py
@@ -107,10 +107,9 @@ def compileCore(options, seeds, inFol, cpu, outpath):
     for seed in seeds:
         seqFile = [inFol + '/' + seed]
         seqName = getSeedName(seed)
-
         if not os.path.exists('%s/core_orthologs/%s/hmm_dir/%s.hmm' % (outpath, seqName, seqName)):
             (basicArgs, ioArgs, pathArgs, coreArgs, orthoArgs, fasArgs, otherArgs, mute) = prepare(seqFile + [seqName] + options, 'core')
-            coreCompilationJobs.append([basicArgs, ioArgs, pathArgs, coreArgs, orthoArgs, fasArgs, otherArgs, assemblyArgs, mute])
+            coreCompilationJobs.append([basicArgs, ioArgs, pathArgs, coreArgs, orthoArgs, fasArgs, otherArgs, mute])
     if len(coreCompilationJobs) > 0:
         pool = mp.Pool(cpu)
         coreOut = []
@@ -132,7 +131,7 @@ def searchOrtho(options, seeds, inFol, cpu, outpath):
     for seed in seeds:
         seqFile = [inFol + '/' + seed]
         seqName = getSeedName(seed)
-        (basicArgs, ioArgs, pathArgs, coreArgs, orthoArgs, fasArgs, otherArgs, assemblyArgs, mute) = prepare(seqFile + [seqName] + options, 'ortholog')
+        (basicArgs, ioArgs, pathArgs, coreArgs, orthoArgs, fasArgs, otherArgs, mute) = prepare(seqFile + [seqName] + options, 'ortholog')
         if mute == True:
             print(seed)
         else:
@@ -331,14 +330,6 @@ def main():
     optional.add_argument('--debug', help='Set this flag to obtain more detailed information about the programs actions', action='store_true', default=False)
     optional.add_argument('--silentOff', help='Show more output to terminal', action='store_true', default=False)
 
-    assembly_options = parser.add_argument_group('Assembly options')
-    assembly_options.add_argument('--assembly', help='Turn on support of assembly input files',action='store_true', default=False)
-    assembly_options.add_argument('--assemblyFile', help='Input file containing the assembly seqeunce', action='store', default='')
-    assembly_options.add_argument('--augustusRefSpec', help='augustus reference species', action='store', default='')
-    assembly_options.add_argument('--avIntron', help='average Intron length of the assembly species', action='store', default=5000, type=int)
-    assembly_options.add_argument('--lengthExtension', help='length extension of the candidate region', action='store', default=5000, type=int)
-    assembly_options.add_argument('--searchTool', help='Choose between BLAST or Diamond as a alignemnt search tool. DEFAULT: BLAST', choices=['blast', 'diamond'], action='store', default='blast')
-    assembly_options.add_argument('--scoringmatrix', help ='Choose a scoring matrix for the distance criteria used by the option --checkCoorthologsRef. DEFAULT: blosum62', choices=['identity', 'blastn', 'trans', 'benner6', 'benner22', 'benner74', 'blosum100', 'blosum30', 'blosum35', 'blosum40', 'blosum45', 'blosum50', 'blosum55', 'blosum60', 'blosum62', 'blosum65', 'blosum70', 'blosum75', 'blosum80', 'blosum85', 'blosum90', 'blosum95', 'feng', 'fitch', 'genetic', 'gonnet', 'grant', 'ident', 'johnson', 'levin', 'mclach', 'miyata', 'nwsgappep', 'pam120', 'pam180', 'pam250', 'pam30', 'pam300', 'pam60', 'pam90', 'rao', 'risler', 'structure'], action='store', default='blosum62')
     ### get arguments
     args = parser.parse_args()
 
@@ -420,15 +411,6 @@ def main():
         silent = False
     else:
         silent = True
-       
-    #fdog_goes_assembly arguments
-    assembly = args.assembly
-    assemblyFile = args.assemblyFile
-    augustusRefSpec = args.augustusRefSpec
-    avIntron = args.avIntron
-    lengthExtension = args.lengthExtension
-    searchTool = args.searchTool
-    matrix = args.scoringmatrix
 
     ### check fas
     if not fasoff:
diff --git a/fdog/runSingle.py b/fdog/runSingle.py
index 7e0f858..f239f90 100644
--- a/fdog/runSingle.py
+++ b/fdog/runSingle.py
@@ -67,13 +67,13 @@ def getfdogInfo(fdogPath, infoType):
         exit('%s not found' % (fdogPath + '/bin/oneSeq.pl'))
 
 def runSingle(args):
-    (basicArgs, ioArgs, pathArgs, coreArgs, orthoArgs, fasArgs, otherArgs, assemblyArgs, mute) = args
+    (basicArgs, ioArgs, pathArgs, coreArgs, orthoArgs, fasArgs, otherArgs, mute) = args
     # basic command
     (fdogPath, seqFile, seqName, refspec, minDist, maxDist, coreOrth) = basicArgs
     cmd = 'perl %s/bin/oneSeq.pl -seqFile=%s -seqName=%s -refspec=%s' % (fdogPath, seqFile, seqName, refspec)
     # add paths
-    (outpath, hmmpath, blastpath, searchpath, weightpath, assemblypath) = pathArgs
-    cmd = cmd + ' -outpath=%s -hmmpath=%s -blastpath=%s -searchpath=%s -weightpath=%s -assemblypath=%s' % (outpath, hmmpath, blastpath, searchpath, weightpath, assemblypath)
+    (outpath, hmmpath, blastpath, searchpath, weightpath) = pathArgs
+    cmd = cmd + ' -outpath=%s -hmmpath=%s -blastpath=%s -searchpath=%s -weightpath=%s' % (outpath, hmmpath, blastpath, searchpath, weightpath)
     # add other I/O options
     (append, force, noCleanup, group, blast, db) = ioArgs
     if append == True:
@@ -165,28 +165,7 @@ def runSingle(args):
         cmd = cmd + ' -debug'
     if silent == True:
         cmd = cmd + ' -silent'
-    # add assembly options
-    (assembly, assemblyFile, augustusRefSpec, avIntron, lengthExtension, searchTool, matrix, dataPath) = assemblyArgs
-    if assembly == True:
-        cmd = cmd + ' -assembly'
-        cmd = cmd + ' -reuseCore'
-        if not augustusRefSpec == '':
-            cmd = cmd + ' -augustusRefSpec=%s' % augustusRefSpec
-        else:
-            sys.exit('An augutus reference species is requiered by using the option --assembly')
-        if not avIntron == '':
-            cmd = cmd + ' -avIntron=%s' % avIntron
-        if not lengthExtension == '':
-            cmd = cmd + ' -lengthExtension=%s' % lengthExtension
-        if not assemblyFile == '':
-            cmd = cmd + ' -assemblyFile=%s' % assemblyFile
-        if not searchTool == '':
-            cmd = cmd + ' -searchTool=%s' % searchTool
-        if not matrix == '':
-            cmd = cmd + ' -scoringmatrix=%s' % matrix
-        if not dataPath == '':
-            cmd = cmd + ' -dataPath=%s' % dataPath
-    #print(cmd)
+    # print(cmd)
     if mute == True:
         cmd = cmd + ' > /dev/null 2>&1'
     try:
@@ -238,8 +217,6 @@ def main():
     optional_paths.add_argument('--searchpath', help='Path for the search taxa directory', action='store', default='')
     optional_paths.add_argument('--weightpath', help='Path for the pre-calculated feature annotion directory', action='store', default='')
     optional_paths.add_argument('--pathFile', help='Config file contains paths to data folder (in yaml format)', action='store', default='')
-    optional_paths.add_argument('--assemblypath', help='Path for the assembly directory', action='store', default='')
-
 
     addtionalIO = parser.add_argument_group('Other I/O options')
     addtionalIO.add_argument('--append', help='Append the output to existing output files', action='store_true', default=False)
@@ -326,14 +303,6 @@ def main():
     optional.add_argument('--debug', help='Set this flag to obtain more detailed information about the programs actions', action='store_true', default=False)
     optional.add_argument('--silentOff', help='Show more output to terminal', action='store_true', default=False)
 
-    assembly_options = parser.add_argument_group('Assembly options')
-    assembly_options.add_argument('--assembly', help='Turn on support of assembly input files',action='store_true', default=False)
-    assembly_options.add_argument('--assemblyFile', help='Input file containing the assembly seqeunce', action='store', default='')
-    assembly_options.add_argument('--augustusRefSpec', help='augustus reference species', action='store', default='')
-    assembly_options.add_argument('--avIntron', help='average Intron length of the assembly species', action='store', default=5000, type=int)
-    assembly_options.add_argument('--lengthExtension', help='length extension of the candidate region', action='store', default=5000, type=int)
-    assembly_options.add_argument('--searchTool', help='Choose between BLAST or Diamond as a alignemnt search tool. DEFAULT: BLAST', choices=['blast', 'diamond'], action='store', default='blast')
-    assembly_options.add_argument('--scoringmatrix', help ='Choose a scoring matrix for the distance criteria used by the option --checkCoorthologsRef. DEFAULT: blosum62', choices=['identity', 'blastn', 'trans', 'benner6', 'benner22', 'benner74', 'blosum100', 'blosum30', 'blosum35', 'blosum40', 'blosum45', 'blosum50', 'blosum55', 'blosum60', 'blosum62', 'blosum65', 'blosum70', 'blosum75', 'blosum80', 'blosum85', 'blosum90', 'blosum95', 'feng', 'fitch', 'genetic', 'gonnet', 'grant', 'ident', 'johnson', 'levin', 'mclach', 'miyata', 'nwsgappep', 'pam120', 'pam180', 'pam250', 'pam30', 'pam300', 'pam60', 'pam90', 'rao', 'risler', 'structure'], action='store', default='blosum62')
     ### get arguments
     args = parser.parse_args()
 
@@ -353,7 +322,6 @@ def main():
     searchpath = args.searchpath
     weightpath = args.weightpath
     pathFile = args.pathFile
-    assemblypath = args.assemblypath
 
     # other I/O arguments
     append = args.append
@@ -415,15 +383,6 @@ def main():
     else:
         silent = True
 
-    #fdog_goes_assembly arguments
-    assembly = args.assembly
-    assemblyFile = args.assemblyFile
-    augustusRefSpec = args.augustusRefSpec
-    avIntron = args.avIntron
-    lengthExtension = args.lengthExtension
-    searchTool = args.searchTool
-    matrix = args.scoringmatrix
-
     ### get fdog and data path
     dataPath = ''
     fdogPath = os.path.realpath(__file__).replace('/runSingle.py','')
@@ -471,29 +430,19 @@ def main():
             except:
                 sys.exit('weightpath not found in %s' % pathFile)
 
-    if assemblypath == '':
-        assemblypath = dataPath + '/assembly_dir'
-        if dataPath == 'config':
-            try:
-                assemblypath = cfg['assemblypath']
-            except:
-                sys.exit('assemblypath not found in %s' % pathFile)
-        if assembly == True:
-            searchpath = assemblypath
-
     ### check input arguments
     seqFile, hmmpath, blastpath, searchpath, weightpath = checkInput([fdogPath, seqFile, refspec, outpath, hmmpath, blastpath, searchpath, weightpath])
     # group arguments
     basicArgs = [fdogPath, seqFile, seqName, refspec, minDist, maxDist, coreOrth]
     ioArgs = [append, force, noCleanup, group, blast, db]
-    pathArgs = [outpath, hmmpath, blastpath, searchpath, weightpath, assemblypath]
+    pathArgs = [outpath, hmmpath, blastpath, searchpath, weightpath]
     coreArgs = [coreOnly, reuseCore, coreTaxa, coreStrict, CorecheckCoorthologsRef, coreRep, coreHitLimit, distDeviation]
     fasArgs = [fasoff, countercheck, coreFilter, minScore]
     orthoArgs = [strict, checkCoorthologsRef, rbh, rep, ignoreDistance, lowComplexityFilter, evalBlast, evalHmmer, evalRelaxfac, hitLimit, autoLimit, scoreThreshold, scoreCutoff, aligner, local, glocal, searchTaxa]
     otherArgs = [cpu, hyperthread, checkOff, debug, silent]
 
     ### run fdog
-    runSingle([basicArgs, ioArgs, pathArgs, coreArgs, orthoArgs, fasArgs, otherArgs, assemblyArgs, False])
+    runSingle([basicArgs, ioArgs, pathArgs, coreArgs, orthoArgs, fasArgs, otherArgs, False])
 
     ### create PhyloProfile config file
     createConfigPP(outpath, seqName, refspec)
diff --git a/fdog/setup/install_lib.sh b/fdog/setup/install_lib.sh
index e5ca4a9..1eaf176 100755
--- a/fdog/setup/install_lib.sh
+++ b/fdog/setup/install_lib.sh
@@ -85,6 +85,7 @@ dependenciesUbuntu=(
   perl-doc
   locales
   lib32z1
+  augustus
 )
 
 dependenciesMac=(
@@ -94,6 +95,7 @@ dependenciesMac=(
   mafft
   brewsci/bio/muscle
   blast
+  augustus
 )
 
 if [ "$sys" == "Darwin" ]; then
@@ -108,7 +110,11 @@ else
   sudo apt-get update -y
   for i in "${dependenciesUbuntu[@]}"; do
     echo $i
-    sudo apt-get install -y -qq $i > /dev/null
+    if ["$i" == "augustus"]; then
+      sudo apt install augustus > /dev/null
+    else
+      sudo apt-get install -y -qq $i > /dev/null
+    fi
   done
 fi
 
@@ -119,6 +125,7 @@ dependencies=(
   mafft
   muscle
   blastn
+  augustus
 )
 
 for i in "${dependencies[@]}"; do
diff --git a/fdog/setup/setup.sh b/fdog/setup/setup.sh
index 7515ed2..28eb851 100755
--- a/fdog/setup/setup.sh
+++ b/fdog/setup/setup.sh
@@ -315,6 +315,8 @@ mafft
 muscle
 clustalw
 blastp
+augustus
+tblastn
 )
 
 for i in "${dependencies[@]}"; do
@@ -324,6 +326,14 @@ for i in "${dependencies[@]}"; do
       tool="clustalw2"
     fi
   fi
+  if [ $tool == "tblastn" ]; then
+    requiredver="2.9.0"
+    currentver="$(tblastn -version | head -n1 | cut -d" " -f2 | sed 's/+//g')"
+    t=$(printf '%s\n' $requiredver $currentver | sort -V | head -n1)
+    if [ $t == $currentver ]; then
+      echo -e "\t\e[31mWARNING BLAST+ needs an update to at least version ${requiredver}!\e[0m"
+    fi
+  fi
   if [ -z "$(which $tool)" ]; then
     echo -e "\t\e[31mWARNING $tool not found!\e[0m"
     flag=1
diff --git a/fdog/setup/setup_conda.sh b/fdog/setup/setup_conda.sh
index cf1bc6d..73b8573 100755
--- a/fdog/setup/setup_conda.sh
+++ b/fdog/setup/setup_conda.sh
@@ -116,6 +116,7 @@ dependencies=(
   mafft # for linsi
   muscle
   fasta36
+  augustus #for fdog.assembly
 )
 
 for i in "${dependencies[@]}"; do
@@ -134,6 +135,8 @@ for i in "${dependencies[@]}"; do
       fi
     elif [ "$tool" = "fasta36" ]; then
       conda install -y -c bioconda fasta3
+    elif [ "$tool" = "augustus" ]; then
+      conda install -y -c bioconda augustus
     else
       conda install -y -c bioconda $i
     fi
@@ -363,6 +366,8 @@ clustalw
 mafft
 muscle
 fasta3
+augustus
+tblastn
 )
 for i in "${condaPkgs[@]}"; do
   if [[ -z $(conda list | $grepprog "$i ") ]]; then
@@ -375,6 +380,13 @@ for i in "${condaPkgs[@]}"; do
       progname="hmmsearch"
     elif [ "$i" == "fasta3" ]; then
       progname="fasta36"
+    elif [ "$i" == "tblastn" ]; then
+      requiredver="2.9.0"
+      currentver="$(tblastn -version | head -n1 | cut -d" " -f2 | sed 's/+//g')"
+      t=$(printf '%s\n' $requiredver $currentver | sort -V | head -n1)
+      if [ $t == $currentver ]; then
+        echo -e "\t\e[31mWARNING BLAST+ needs an update to at least version ${requiredver}!\e[0m"
+      fi
     fi
     if [ -z "$(which $progname)" ]; then
       echo -e "\t\e[31m$i could not be installed\e[0m"