Skip to content

Commit 43f6e8e

Browse files
author
Steven Wingett
committed
GitHub issue #1: fastq_screen in --bisulfite mode now runs Bismark in --ambiguous mode to identify multi-mapping reads. Prepared FastQ Screen for release of v0.12.2
1 parent 279946c commit 43f6e8e

File tree

2 files changed

+41
-6
lines changed

2 files changed

+41
-6
lines changed

RELEASE_NOTES.txt

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1+
Release notes for FastQ Screen v0.12.2 (16 August 2018)
2+
-------------------------------------------------------
3+
For bisulfite mapping, bismark is now run in --ambiguous
4+
mode to identify multi-mapping reads.
5+
6+
7+
18
Release notes for FastQ Screen v0.12.1 (24 July 2018)
29
-----------------------------------------------------
3-
410
Added --inverse option for filtering files.
511

612
Improved results display format.

fastq_screen

+34-5
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use Cwd;
1212
use Data::Dumper;
1313

1414

15-
our $VERSION = "0.12.1";
15+
our $VERSION = "0.12.2";
1616

1717
###########################################################################
1818
###########################################################################
@@ -1036,6 +1036,8 @@ sub commify {
10361036
return scalar reverse $text;
10371037
}
10381038

1039+
1040+
10391041
#Returns a suitably formatted datestamp
10401042
sub datestampGenerator {
10411043
my @now = localtime();
@@ -1049,6 +1051,7 @@ sub datestampGenerator {
10491051
}
10501052

10511053

1054+
10521055
#Uses Bismark to map an input file to a specified genome
10531056
#Results stored in the @index_genomes array
10541057
sub bisulfite_mapping {
@@ -1057,6 +1060,7 @@ sub bisulfite_mapping {
10571060
my $bismark_command;
10581061
my $sam_output_option = '';
10591062
$sam_output_option = '--sam' unless ( defined $path_to_samtools );
1063+
my $db_name = $$library[0];
10601064

10611065
#Determine whether to execute bowtie1 or bowtie2
10621066
my $prefix = $library->[0];
@@ -1069,7 +1073,7 @@ sub bisulfite_mapping {
10691073
$path_to_aligner_command = join('/', @elements) . '/';
10701074
$path_to_aligner_command = '--path_to_bowtie ' . $path_to_aligner_command;
10711075
}
1072-
$bismark_command = "$path_to_bismark $sam_output_option $path_to_aligner_command --bowtie1 $bowtie_opts $illumina_flag --non_directional --prefix $prefix --output_dir $outdir $library->[1] $file 1>/dev/null 2>$error_filename";
1076+
$bismark_command = "$path_to_bismark $sam_output_option $path_to_aligner_command --ambiguous --bowtie1 $bowtie_opts $illumina_flag --non_directional --prefix $prefix --output_dir $outdir $library->[1] $file 1>/dev/null 2>$error_filename";
10731077
} else { #Using Bowtie2
10741078

10751079
if($path_to_bowtie ne 'bowtie2'){
@@ -1079,7 +1083,7 @@ sub bisulfite_mapping {
10791083
$path_to_aligner_command = join('/', @elements) . '/';
10801084
$path_to_aligner_command = '--path_to_bowtie ' . $path_to_aligner_command; #Bismark uses --path_to_bowtie for Bowtie1 and Bowtie2
10811085
}
1082-
$bismark_command = "$path_to_bismark $sam_output_option $path_to_aligner_command --bowtie2 $bowtie2_opts $illumina_flag --non_directional --prefix $prefix --output_dir $outdir $library->[1] $file 1>/dev/null 2>$error_filename";
1086+
$bismark_command = "$path_to_bismark $sam_output_option $path_to_aligner_command --ambiguous --bowtie2 $bowtie2_opts $illumina_flag --non_directional --prefix $prefix --output_dir $outdir $library->[1] $file 1>/dev/null 2>$error_filename";
10831087
}
10841088

10851089
!system($bismark_command) or die "Could not run Bismark with command: '$bismark_command'.\n";
@@ -1148,7 +1152,32 @@ sub bisulfite_mapping {
11481152
die "Unexpected combination of read and genome conversion: '$read_conversion' / '$genome_conversion'\n";
11491153
}
11501154
}
1151-
1155+
1156+
#Multi-mapping reads have been written to "ambiguous" FASTQ files
1157+
#Extract the IDs and report as multi-mapping
1158+
my $ambiguous_file = basename($file);
1159+
$ambiguous_file = $outdir . "$db_name.$ambiguous_file" . "_ambiguous_reads.fq.gz";
1160+
open (AMBIGUOUS_FILE, "gunzip -c $ambiguous_file |") or die "Could not read file '$ambiguous_file ' : $!";
1161+
1162+
while (<AMBIGUOUS_FILE>) {
1163+
my $seqname = $_;
1164+
($seqname) = split(/\./, $seqname);
1165+
$seqname = substr($seqname, 1); #Ignore @
1166+
1167+
#Record twice, so reads from the ambiguous file are considered as multi-mapping
1168+
unless ( defined ${$index_genomes_ref}[$seqname] ) {
1169+
${$index_genomes_ref}[$seqname] = 0; #Initialise - array may have 'gaps'
1170+
}
1171+
1172+
${$index_genomes_ref}[$seqname] = record_hit( ${$index_genomes_ref}[$seqname], $library_index + 1 );
1173+
${$index_genomes_ref}[$seqname] = record_hit( ${$index_genomes_ref}[$seqname], $library_index + 1 );
1174+
1175+
scalar <AMBIGUOUS_FILE>; #Ignore rest of FASTQ read
1176+
scalar <AMBIGUOUS_FILE>;
1177+
scalar <AMBIGUOUS_FILE>;
1178+
}
1179+
close AMBIGUOUS_FILE or die "Cannot close filehandle on '$ambiguous_file' : $!";
1180+
11521181
#Check the Standard Error and report any errors
11531182
#Bowtie reports the alignment summary to standard error, so ignore the alignment summary
11541183
while (<$error_fh>) {
@@ -1163,14 +1192,14 @@ sub bisulfite_mapping {
11631192
my $bismark_report_file = $mapped_file;
11641193
$bismark_report_file =~ s/\.sam$|\.bam$/_SE_report.txt/;
11651194
unlink $bismark_report_file or die "Could not delete Bismark report file '$bismark_report_file'.\n";
1195+
unlink $ambiguous_file or die "Could not delete Bismark amibiguous reads outputfile '$ambiguous_file'.\n";
11661196
}
11671197

11681198
#Uses Bowtie or Bowtie2 to map an input file to a specified genome
11691199
#Results stored in the @index_genomes array
11701200
sub conventional_mapping {
11711201

11721202
my ( $illumina_flag, $read_length, $library, $file, $error_filename, $index_genomes_ref, $library_index, $error_fh ) = @_;
1173-
11741203
my $aligner_command;
11751204

11761205
#Determine whether to execute bowtie1 or bowtie2

0 commit comments

Comments
 (0)