@@ -12,7 +12,7 @@ use Cwd;
12
12
use Data::Dumper;
13
13
14
14
15
- our $VERSION = " 0.12.1 " ;
15
+ our $VERSION = " 0.12.2 " ;
16
16
17
17
# ##########################################################################
18
18
# ##########################################################################
@@ -1036,6 +1036,8 @@ sub commify {
1036
1036
return scalar reverse $text ;
1037
1037
}
1038
1038
1039
+
1040
+
1039
1041
# Returns a suitably formatted datestamp
1040
1042
sub datestampGenerator {
1041
1043
my @now = localtime ();
@@ -1049,6 +1051,7 @@ sub datestampGenerator {
1049
1051
}
1050
1052
1051
1053
1054
+
1052
1055
# Uses Bismark to map an input file to a specified genome
1053
1056
# Results stored in the @index_genomes array
1054
1057
sub bisulfite_mapping {
@@ -1057,6 +1060,7 @@ sub bisulfite_mapping {
1057
1060
my $bismark_command ;
1058
1061
my $sam_output_option = ' ' ;
1059
1062
$sam_output_option = ' --sam' unless ( defined $path_to_samtools );
1063
+ my $db_name = $$library [0];
1060
1064
1061
1065
# Determine whether to execute bowtie1 or bowtie2
1062
1066
my $prefix = $library -> [0];
@@ -1069,7 +1073,7 @@ sub bisulfite_mapping {
1069
1073
$path_to_aligner_command = join (' /' , @elements ) . ' /' ;
1070
1074
$path_to_aligner_command = ' --path_to_bowtie ' . $path_to_aligner_command ;
1071
1075
}
1072
- $bismark_command = " $path_to_bismark $sam_output_option $path_to_aligner_command --bowtie1 $bowtie_opts $illumina_flag --non_directional --prefix $prefix --output_dir $outdir $library ->[1] $file 1>/dev/null 2>$error_filename " ;
1076
+ $bismark_command = " $path_to_bismark $sam_output_option $path_to_aligner_command --ambiguous -- bowtie1 $bowtie_opts $illumina_flag --non_directional --prefix $prefix --output_dir $outdir $library ->[1] $file 1>/dev/null 2>$error_filename " ;
1073
1077
} else { # Using Bowtie2
1074
1078
1075
1079
if ($path_to_bowtie ne ' bowtie2' ){
@@ -1079,7 +1083,7 @@ sub bisulfite_mapping {
1079
1083
$path_to_aligner_command = join (' /' , @elements ) . ' /' ;
1080
1084
$path_to_aligner_command = ' --path_to_bowtie ' . $path_to_aligner_command ; # Bismark uses --path_to_bowtie for Bowtie1 and Bowtie2
1081
1085
}
1082
- $bismark_command = " $path_to_bismark $sam_output_option $path_to_aligner_command --bowtie2 $bowtie2_opts $illumina_flag --non_directional --prefix $prefix --output_dir $outdir $library ->[1] $file 1>/dev/null 2>$error_filename " ;
1086
+ $bismark_command = " $path_to_bismark $sam_output_option $path_to_aligner_command --ambiguous -- bowtie2 $bowtie2_opts $illumina_flag --non_directional --prefix $prefix --output_dir $outdir $library ->[1] $file 1>/dev/null 2>$error_filename " ;
1083
1087
}
1084
1088
1085
1089
!system ($bismark_command ) or die " Could not run Bismark with command: '$bismark_command '.\n " ;
@@ -1148,7 +1152,32 @@ sub bisulfite_mapping {
1148
1152
die " Unexpected combination of read and genome conversion: '$read_conversion ' / '$genome_conversion '\n " ;
1149
1153
}
1150
1154
}
1151
-
1155
+
1156
+ # Multi-mapping reads have been written to "ambiguous" FASTQ files
1157
+ # Extract the IDs and report as multi-mapping
1158
+ my $ambiguous_file = basename($file );
1159
+ $ambiguous_file = $outdir . " $db_name .$ambiguous_file " . " _ambiguous_reads.fq.gz" ;
1160
+ open (AMBIGUOUS_FILE, " gunzip -c $ambiguous_file |" ) or die " Could not read file '$ambiguous_file ' : $! " ;
1161
+
1162
+ while (<AMBIGUOUS_FILE>) {
1163
+ my $seqname = $_ ;
1164
+ ($seqname ) = split (/ \. / , $seqname );
1165
+ $seqname = substr ($seqname , 1); # Ignore @
1166
+
1167
+ # Record twice, so reads from the ambiguous file are considered as multi-mapping
1168
+ unless ( defined ${$index_genomes_ref} [$seqname ] ) {
1169
+ ${$index_genomes_ref} [$seqname ] = 0; # Initialise - array may have 'gaps'
1170
+ }
1171
+
1172
+ ${$index_genomes_ref} [$seqname ] = record_hit( ${$index_genomes_ref} [$seqname ], $library_index + 1 );
1173
+ ${$index_genomes_ref} [$seqname ] = record_hit( ${$index_genomes_ref} [$seqname ], $library_index + 1 );
1174
+
1175
+ scalar <AMBIGUOUS_FILE>; # Ignore rest of FASTQ read
1176
+ scalar <AMBIGUOUS_FILE>;
1177
+ scalar <AMBIGUOUS_FILE>;
1178
+ }
1179
+ close AMBIGUOUS_FILE or die " Cannot close filehandle on '$ambiguous_file ' : $! " ;
1180
+
1152
1181
# Check the Standard Error and report any errors
1153
1182
# Bowtie reports the alignment summary to standard error, so ignore the alignment summary
1154
1183
while (<$error_fh >) {
@@ -1163,14 +1192,14 @@ sub bisulfite_mapping {
1163
1192
my $bismark_report_file = $mapped_file ;
1164
1193
$bismark_report_file =~ s /\. sam$|\. bam$/ _SE_report.txt/ ;
1165
1194
unlink $bismark_report_file or die " Could not delete Bismark report file '$bismark_report_file '.\n " ;
1195
+ unlink $ambiguous_file or die " Could not delete Bismark amibiguous reads outputfile '$ambiguous_file '.\n " ;
1166
1196
}
1167
1197
1168
1198
# Uses Bowtie or Bowtie2 to map an input file to a specified genome
1169
1199
# Results stored in the @index_genomes array
1170
1200
sub conventional_mapping {
1171
1201
1172
1202
my ( $illumina_flag , $read_length , $library , $file , $error_filename , $index_genomes_ref , $library_index , $error_fh ) = @_ ;
1173
-
1174
1203
my $aligner_command ;
1175
1204
1176
1205
# Determine whether to execute bowtie1 or bowtie2
0 commit comments