Skip to content

Add support for Fasta records #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Version 0.7.0 (2020-03-21)

- Add support for Fasta files

# Version 0.6.0 (2018-08-10)

- Make `RecordRefReader` public.
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

# A fast parser for fastq.

This library can process fastq files at about the speed of the
This library can process fast{q,a} files at about the speed of the
coreutils `wc -l` (about 2GB/s on my laptop, `seqan` runs at
about 150MB/s). It also makes it easy to distribute the
processing of fastq records to many cores, without losing much
processing of fast{q,a} records to many cores, without losing much
of the performance.

See the [documentation](https://docs.rs/fastq/) for details and examples.
Expand Down
40 changes: 21 additions & 19 deletions examples/alignment_count.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use fastq::{parse_path, Record};
use std::env::args;
use fastq::{parse_path_fq, Record};
use parasailors as align;
use std::env::args;

extern crate fastq;
extern crate parasailors;
Expand All @@ -12,29 +12,31 @@ const ADAPTER: &'static [u8] = b"AATGATACGGCGACCACCGAGA\
fn main() {
let filename = args().nth(1);
let path = match filename.as_ref().map(String::as_ref) {
None | Some("-") => { None },
Some(name) => Some(name)
None | Some("-") => None,
Some(name) => Some(name),
};

let results = parse_path(path, |parser| {
let results = parse_path_fq(path, |parser| {
let nthreads = 2;
let results: Vec<usize> = parser.parallel_each(nthreads, |record_sets| {
let matrix = align::Matrix::new(align::MatrixType::Identity);
let profile = align::Profile::new(ADAPTER, &matrix);
let mut thread_total = 0;
let results: Vec<usize> = parser
.parallel_each(nthreads, |record_sets| {
let matrix = align::Matrix::new(align::MatrixType::Identity);
let profile = align::Profile::new(ADAPTER, &matrix);
let mut thread_total = 0;

for record_set in record_sets {
for record in record_set.iter() {
let score = align::local_alignment_score(
&profile, record.seq(), 8, 1);
if score > 10 {
thread_total += 1;
for record_set in record_sets {
for record in record_set.iter() {
let score = align::local_alignment_score(&profile, record.seq(), 8, 1);
if score > 10 {
thread_total += 1;
}
}
}
}
thread_total
}).expect("Invalid fastq file");
thread_total
})
.expect("Invalid fastq file");
results
}).expect("Invalid compression");
})
.expect("Invalid compression");
println!("{}", results.iter().sum::<usize>());
}
19 changes: 11 additions & 8 deletions examples/bio-count.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
use bio::io::fastq::{Reader, Record};
use std::io::stdin;
use std::sync::mpsc::sync_channel;
use std::thread::Builder;
use bio::io::fastq::{Reader, Record};

extern crate bio;

fn main() {
let reader = Reader::new(stdin());
let (tx, rx) = sync_channel::<Vec<Record>>(10);

let handle = Builder::new().name("worker".to_string()).spawn(move || {
let mut count: usize = 0;
while let Ok(val) = rx.recv() {
count += val.len();
}
count
}).unwrap();
let handle = Builder::new()
.name("worker".to_string())
.spawn(move || {
let mut count: usize = 0;
while let Ok(val) = rx.recv() {
count += val.len();
}
count
})
.unwrap();

//let mut record = Record::new();
//while let Ok(_) = reader.read(&mut record) {
Expand Down
27 changes: 15 additions & 12 deletions examples/fastq-count-thread.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
use fastq::parse_path;
use fastq::parse_path_fq;
use std::env::args;

extern crate fastq;

fn main() {
let filename = args().nth(1);
let path = match filename.as_ref().map(String::as_ref) {
None | Some("-") => { None },
Some(name) => Some(name)
None | Some("-") => None,
Some(name) => Some(name),
};

parse_path(path, |parser| {
let results: Vec<usize> = parser.parallel_each(1, |record_sets| {
let mut thread_total = 0;
for record_set in record_sets {
thread_total += record_set.len();
}
thread_total
}).expect("Invalid fastq file");
parse_path_fq(path, |parser| {
let results: Vec<usize> = parser
.parallel_each(1, |record_sets| {
let mut thread_total = 0;
for record_set in record_sets {
thread_total += record_set.len();
}
thread_total
})
.expect("Invalid fastq file");
println!("{}", results.iter().sum::<usize>());
}).expect("Invalid compression");
})
.expect("Invalid compression");
}
22 changes: 12 additions & 10 deletions examples/fastq-count.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
use fastq::parse_path;
use fastq::parse_path_fq;
use std::env::args;

extern crate fastq;


fn main() {
let filename = args().nth(1);
let path = match filename.as_ref().map(String::as_ref) {
None | Some("-") => { None },
Some(name) => Some(name)
None | Some("-") => None,
Some(name) => Some(name),
};

let mut total: usize = 0;
parse_path(path, |parser| {
parser.each(|_| {
total += 1;
true
}).expect("Invalid fastq file");
}).expect("Invalid compression");
parse_path_fq(path, |parser| {
parser
.each(|_| {
total += 1;
true
})
.expect("Invalid fastq file");
})
.expect("Invalid compression");
println!("{}", total);
}
16 changes: 9 additions & 7 deletions examples/multiple-files.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
use fastq::{parse_path, each_zipped};
use fastq::{each_zipped, parse_path_fq};
use std::env::args;

extern crate fastq;


fn main() {
let path1 = args().nth(1).expect("Need two input files.");
let path2 = args().nth(2).expect("Need two input files.");

let mut counts = (0u64, 0u64);

parse_path(Some(path1), |parser1| {
parse_path(Some(path2), |parser2| {
parse_path_fq(Some(path1), |parser1| {
parse_path_fq(Some(path2), |parser2| {
each_zipped(parser1, parser2, |rec1, rec2| {
if rec1.is_some() {
counts.0 += 1;
Expand All @@ -20,9 +19,12 @@ fn main() {
counts.1 += 1;
}
(true, true)
}).expect("Invalid record.");
}).expect("Unknown format for file 2.");
}).expect("Unknown format for file 1.");
})
.expect("Invalid record.");
})
.expect("Unknown format for file 2.");
})
.expect("Unknown format for file 1.");

println!("Number of reads: ({}, {})", counts.0, counts.1);
}
4 changes: 0 additions & 4 deletions fuzz/.gitignore

This file was deleted.

8 changes: 8 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,11 @@ path = "fuzz_targets/fuzz_target_1.rs"
[[bin]]
name = "fuzz_target_2"
path = "fuzz_targets/fuzz_target_2.rs"

[[bin]]
name = "fuzz_target_3"
path = "fuzz_targets/fuzz_target_3.rs"

[[bin]]
name = "fuzz_target_4"
path = "fuzz_targets/fuzz_target_4.rs"
15 changes: 9 additions & 6 deletions fuzz/fuzz_targets/fuzz_target_1.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
#![no_main]

use std::io::Cursor;
use fastq::Record;
use std::io::Cursor;

#[macro_use] extern crate libfuzzer_sys;
extern crate fastq;
#[macro_use]
extern crate libfuzzer_sys;
extern crate criterion;

extern crate fastq;

fuzz_target!(|data: &[u8]| {
let reader = Cursor::new(data);
let mut parser = fastq::Parser::new(reader);
let mut parser = fastq::Parser::new(reader, fastq::ParserKind::Fastq);

let mut sum: usize = 0;
let _ = parser.each(|rec| {sum += rec.seq().len(); true});
let _ = parser.each(|rec| {
sum += rec.seq().len();
true
});
criterion::black_box(sum);
});
8 changes: 4 additions & 4 deletions fuzz/fuzz_targets/fuzz_target_2.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#![no_main]

use std::io::Cursor;
use fastq::Record;
use std::io::Cursor;

#[macro_use] extern crate libfuzzer_sys;
#[macro_use]
extern crate libfuzzer_sys;
extern crate fastq;


fuzz_target!(|data: &[u8]| {
let reader = Cursor::new(data);
let mut parser = fastq::Parser::new(reader);
let mut parser = fastq::Parser::new(reader, fastq::ParserKind::Fastq);

let _: Result<Vec<_>, _> = parser.parallel_each(3, |sets| {
for set in sets {
Expand Down
21 changes: 21 additions & 0 deletions fuzz/fuzz_targets/fuzz_target_3.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#![no_main]

use fastq::Record;
use std::io::Cursor;

#[macro_use]
extern crate libfuzzer_sys;
extern crate criterion;
extern crate fastq;

fuzz_target!(|data: &[u8]| {
let reader = Cursor::new(data);
let mut parser = fastq::Parser::new(reader, fastq::ParserKind::Fasta);

let mut sum: usize = 0;
let _ = parser.each(|rec| {
sum += rec.seq().len();
true
});
criterion::black_box(sum);
});
22 changes: 22 additions & 0 deletions fuzz/fuzz_targets/fuzz_target_4.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#![no_main]

use fastq::Record;
use std::io::Cursor;

#[macro_use]
extern crate libfuzzer_sys;
extern crate fastq;

fuzz_target!(|data: &[u8]| {
let reader = Cursor::new(data);
let mut parser = fastq::Parser::new(reader, fastq::ParserKind::Fasta);

let _: Result<Vec<_>, _> = parser.parallel_each(3, |sets| {
for set in sets {
for rec in set.iter() {
rec.seq();
}
}
true
});
});
Loading