diff --git a/Cargo.lock b/Cargo.lock index 9ea2854..ec7a7ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -880,6 +880,7 @@ name = "fqkit" version = "0.4.2" dependencies = [ "anyhow", + "atty", "bio", "bzip2", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 0186b0f..b0f1e2a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ license = "GPL-3.0" [dependencies] anyhow = "1.0.75" +atty = "0.2.14" bio = "1.5.0" bzip2 = "0.4.4" chrono = "0.4.30" @@ -36,4 +37,4 @@ xz2 = "0.1.7" [profile.release] -strip = true \ No newline at end of file +strip = true diff --git a/src/cli/barcode.rs b/src/cli/barcode.rs index bd1341b..e5890b7 100644 --- a/src/cli/barcode.rs +++ b/src/cli/barcode.rs @@ -4,15 +4,16 @@ use bio::io::fastq; use log::*; use std::collections::HashMap; use std::io::BufRead; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::time::Instant; fn barcode_list(file: &String, rev_comp: bool) -> Result> { - info!("reading from barcode list file: {}", file); - let fp = file_reader(Some(file))?; let mut maps = HashMap::new(); let mut error_flag = ""; + let fp = file_reader(Some(file))?; + info!("reading from barcode list file: {}", file); + if rev_comp { for line in fp.lines().map_while(std::io::Result::ok) { let item = line.split('\t').collect::>(); // barcode => sample @@ -125,31 +126,31 @@ pub fn split_fq( let mut fq_hand = Vec::new(); for (bar_seq, name) in maps { let fq1 = if gzip { - format!("{}/{}_1.fq.gz", outdir, name) + PathBuf::from(outdir).join(format!("{}_1.fq.gz",name)) } else if bzip2 { - format!("{}/{}_1.fq.bz2", outdir, name) + PathBuf::from(outdir).join(format!("{}_1.fq.bz2",name)) } else if xz { - format!("{}/{}_1.fq.xz", outdir, name) + PathBuf::from(outdir).join(format!("{}_1.fq.xz",name)) } else { - format!("{}/{}_1.fq", outdir, name) + PathBuf::from(outdir).join(format!("{}_1.fq",name)) }; let fq2 = if gzip { - format!("{}/{}_2.fq.gz", outdir, name) + PathBuf::from(outdir).join(format!("{}_2.fq.gz",name)) } else if bzip2 { - format!("{}/{}_2.fq.bz2", outdir, name) + PathBuf::from(outdir).join(format!("{}_2.fq.bz2",name)) } else if xz { - format!("{}/{}_2.fq.xz", outdir, name) + PathBuf::from(outdir).join(format!("{}_2.fq.xz",name)) } else { - format!("{}/{}_2.fq", outdir, name) + PathBuf::from(outdir).join(format!("{}_2.fq",name)) }; let bar = if gzip { - format!("{}/{}_barcode.fq.gz", outdir, name) + PathBuf::from(outdir).join(format!("{}_barcode.fq.gz",name)) } else if bzip2 { - format!("{}/{}_barcode.fq.bz2", outdir, name) + PathBuf::from(outdir).join(format!("{}_barcode.fq.bz2",name)) } else if xz { - format!("{}/{}_barcode.fq.xz", outdir, name) + PathBuf::from(outdir).join(format!("{}_barcode.fq.xz",name)) } else { - format!("{}/{}_barcode.fq", outdir, name) + PathBuf::from(outdir).join(format!("{}_barcode.fq",name)) }; let fh1 = fastq::Writer::new(file_writer_append(&fq1, compression_level)?); @@ -159,14 +160,14 @@ pub fn split_fq( fq_hand.push((bar_seq, len, fh1, fh2, fhb)); } - info!("reading from read1 file: {}", big_fq1); - info!("reading from read2 file: {}", big_fq2); - info!("barcode position mode: {}", mode); - let bar_count = fq_hand.len(); let fq1_reader = fastq::Reader::new(file_reader(Some(big_fq1))?); let fq2_reader = fastq::Reader::new(file_reader(Some(big_fq2))?); let (mut read_pair, mut get_pair) = (0u64, 0u64); + + info!("reading from read1 file: {}", big_fq1); + info!("reading from read2 file: {}", big_fq2); + info!("barcode position mode: {}", mode); if mode == 2 { for (rec1, rec2) in fq1_reader diff --git a/src/cli/check.rs b/src/cli/check.rs index 9216838..7ab214b 100644 --- a/src/cli/check.rs +++ b/src/cli/check.rs @@ -10,15 +10,16 @@ pub fn check_fastq( out: Option<&String>, compression_level: u32, ) -> Result<()> { + let start = Instant::now(); + + let (mut total, mut ok_read, mut err_read) = (0, 0, 0); + let fp_reader = file_reader(file).map(fastq::Reader::new)?; if let Some(file) = file { info!("reading from file: {}", file); } else { info!("reading from stdin"); } - let start = Instant::now(); - let (mut total, mut ok_read, mut err_read) = (0, 0, 0); - let fp_reader = file_reader(file).map(fastq::Reader::new)?; if save { let mut out_writer = file_writer(out, compression_level).map(fastq::Writer::new)?; for rec in fp_reader.records().flatten() { diff --git a/src/cli/concat.rs b/src/cli/concat.rs index de6b2fa..346d062 100644 --- a/src/cli/concat.rs +++ b/src/cli/concat.rs @@ -11,16 +11,14 @@ pub fn concat_fqstq_lane( out_r2: &String, compression_level: u32, ) -> Result<(), Error> { - info!("read forward reads for lane list: {}", r1_list); - info!("read forward reads for lane list: {}", r2_list); - info!("outout read1 in file: {}", out_r1); - info!("outout read1 in file: {}", out_r2); let start = Instant::now(); let mut vec1 = vec![]; let mut vec2 = vec![]; let fp1 = file_reader(Some(r1_list))?; let fp2 = file_reader(Some(r2_list))?; + info!("read forward reads for lane list: {}", r1_list); + info!("read forward reads for lane list: {}", r2_list); for r1 in fp1.lines().map_while(Result::ok) { vec1.push(r1); @@ -35,6 +33,9 @@ pub fn concat_fqstq_lane( ); std::process::exit(1); } + + info!("outout read1 in file: {}", out_r1); + info!("outout read1 in file: {}", out_r2); let mut out_writer1 = file_writer(Some(out_r1), compression_level).map(fastq::Writer::new)?; let mut out_writer2 = file_writer(Some(out_r2), compression_level).map(fastq::Writer::new)?; diff --git a/src/cli/cutadapter.rs b/src/cli/cutadapter.rs index f82d16b..79f7593 100644 --- a/src/cli/cutadapter.rs +++ b/src/cli/cutadapter.rs @@ -14,6 +14,8 @@ pub fn cut_adapter( compression_level: u32, ) -> Result<(), Error> { let start = Instant::now(); + + let seqfile_reader = file_reader(Some(seqfile)).map(fasta::Reader::new)?; if let Some(file) = input { info!("reading seq from file: {}", seqfile); info!("reading from file: {}", file); @@ -22,8 +24,6 @@ pub fn cut_adapter( } let mut seqs = HashMap::new(); - let seqfile_reader = file_reader(Some(seqfile)).map(fasta::Reader::new)?; - let mut iters = seqfile_reader.records(); while let Some(each) = iters.next() { let rec = each?; diff --git a/src/cli/flatten.rs b/src/cli/flatten.rs index 0eb2b67..a652fa1 100644 --- a/src/cli/flatten.rs +++ b/src/cli/flatten.rs @@ -12,6 +12,8 @@ pub fn flatten_fq( compression_level: u32, ) -> Result<(), Error> { let start = Instant::now(); + + let fq_reader = file_reader(file).map(fastq::Reader::new)?; if let Some(file) = file { info!("reading from file: {}", file); } else { @@ -23,8 +25,7 @@ pub fn flatten_fq( error!("error flag numer: {}, flag range [1..15]", flag); std::process::exit(1); } - - let fq_reader = file_reader(file).map(fastq::Reader::new)?; + let mut out_writer = file_writer(out, compression_level)?; let flags = format!("{:b}", flag).chars().rev().collect::>(); let mut fields = vec![]; diff --git a/src/cli/fq2fa.rs b/src/cli/fq2fa.rs index c56c2b9..9d89898 100644 --- a/src/cli/fq2fa.rs +++ b/src/cli/fq2fa.rs @@ -11,17 +11,17 @@ pub fn fq2fa( out: Option<&String>, compression_level: u32, ) -> Result<(), Error> { + let start = Instant::now(); + + let mut num = 0usize; + let fq_reader = fastq::Reader::new(file_reader(file)?); if let Some(file) = file { info!("reading from file: {}", file); } else { info!("reading from stdin"); } - let start = Instant::now(); - let mut num = 0usize; - - let fq_reader = fastq::Reader::new(file_reader(file)?); + let mut fo = fasta::Writer::new(file_writer(out, compression_level)?); - if remove { for rec in fq_reader.records().flatten() { num += 1; diff --git a/src/cli/fqscore.rs b/src/cli/fqscore.rs index 7c9cb2e..9cd0c0c 100644 --- a/src/cli/fqscore.rs +++ b/src/cli/fqscore.rs @@ -11,6 +11,9 @@ pub fn phred_score( to64: bool, compression_level: u32, ) -> Result<()> { + let start = Instant::now(); + + let fq_reader = file_reader(file).map(fastq::Reader::new)?; if let Some(r) = file { info!("read file from: {}", r); } else { @@ -33,10 +36,7 @@ pub fn phred_score( std::process::exit(1); } - let start = Instant::now(); - let fq_reader = file_reader(file).map(fastq::Reader::new)?; let mut fq_writer = file_writer(out, compression_level).map(fastq::Writer::new)?; - for rec in fq_reader.records().flatten() { let mut qual = vec![]; if to33 { diff --git a/src/cli/gcplot.rs b/src/cli/gcplot.rs index c95bdab..da66697 100644 --- a/src/cli/gcplot.rs +++ b/src/cli/gcplot.rs @@ -18,14 +18,15 @@ pub fn gc_content( types: &str, compression_level: u32, ) -> Result<(), Error> { + let start = Instant::now(); + + let fq_reader = file_reader(fqin).map(fastq::Reader::new)?; if let Some(inp) = fqin { info!("reading from file: {}", inp); } else { info!("reading from stdin"); } - let start = Instant::now(); - - let fq_reader = file_reader(fqin).map(fastq::Reader::new)?; + let mut fo = file_writer(output, compression_level)?; let mut df_hash = HashMap::new(); diff --git a/src/cli/grep.rs b/src/cli/grep.rs index 3772cc8..b929dc6 100644 --- a/src/cli/grep.rs +++ b/src/cli/grep.rs @@ -12,18 +12,19 @@ pub fn grep_fastq( out: Option<&String>, compression_level: u32, ) -> Result<()> { + let start = Instant::now(); + + let fq_reader = file_reader(fq).map(fastq::Reader::new)?; if let Some(file) = fq { info!("reading from file: {}", file); } else { info!("reading from stdin"); } - info!("reading reads id from file: {}", list); - let start = Instant::now(); let mut num = 0usize; let mut ids = vec![]; - let fp_id = file_reader(Some(list))?; + info!("reading reads id from file: {}", list); for id in fp_id.lines().map_while(std::io::Result::ok) { ids.push(id); } @@ -31,14 +32,13 @@ pub fn grep_fastq( error!("no reads id in file: {}", list); std::process::exit(1); } + if let Some(out) = out { info!("reads write to file: {}", out); } else { info!("reads write to stdout"); } - let mut fo = file_writer(out, compression_level).map(fastq::Writer::new)?; - let fq_reader = file_reader(fq).map(fastq::Reader::new)?; for rec in fq_reader.records().flatten() { let name = if full_name { if let Some(desc) = rec.desc() { diff --git a/src/cli/length.rs b/src/cli/length.rs index b764d8f..6d46d5b 100644 --- a/src/cli/length.rs +++ b/src/cli/length.rs @@ -10,17 +10,18 @@ pub fn fq_length( out: Option<&String>, compression_level: u32, ) -> Result<(), Error> { + let start = Instant::now(); + + let mut reads_len = HashMap::new(); + let mut total = 0usize; + let fp_reader = file_reader(file).map(fastq::Reader::new)?; if let Some(file) = file { info!("reading from file: {}", file); } else { info!("reading from stdin"); } - let start = Instant::now(); - let mut reads_len = HashMap::new(); - let mut total = 0usize; - let fp_reader = file_reader(file).map(fastq::Reader::new)?; + let mut fo = file_writer(out, compression_level)?; - for rec in fp_reader.records().flatten() { let rlen = rec.seq().len(); *reads_len.entry(rlen).or_insert(0usize) += 1; @@ -30,7 +31,6 @@ pub fn fq_length( let mut sort_len: Vec<(&usize, &usize)> = reads_len.iter().collect(); sort_len.sort_by_key(|x| x.0); - //fo.write("lenth\tcount\n".as_bytes())?; fo.write_all("lenth\tcount\n".as_bytes())?; for (k, v) in sort_len.iter() { fo.write_all(format!("{}\t{}\n", k, v).as_bytes())?; diff --git a/src/cli/mask.rs b/src/cli/mask.rs index dc45029..69d04c6 100644 --- a/src/cli/mask.rs +++ b/src/cli/mask.rs @@ -12,6 +12,10 @@ pub fn mask_fastq( out: Option<&String>, compression_level: u32, ) -> Result<()> { + let start = Instant::now(); + + let (mut mask_base, mut mask_read) = (0, 0); + let fp_reader = file_reader(file).map(fastq::Reader::new)?; if let Some(file) = file { info!("reading from file: {}", file); } else { @@ -19,12 +23,8 @@ pub fn mask_fastq( } info!("low quality value: {}", qual_limit); info!("mask low quality bases with: {}", nt); - let start = Instant::now(); - - let (mut mask_base, mut mask_read) = (0, 0); - let fp_reader = file_reader(file).map(fastq::Reader::new)?; + let mut fp_writer = file_writer(out, compression_level).map(fastq::Writer::new)?; - for rec in fp_reader.records().flatten() { let score_min = rec.qual().iter().min().unwrap() - phred; if score_min > qual_limit { diff --git a/src/cli/merge.rs b/src/cli/merge.rs index 260ae81..c8dce63 100644 --- a/src/cli/merge.rs +++ b/src/cli/merge.rs @@ -10,15 +10,15 @@ pub fn interleaved( out: Option<&String>, compression_level: u32, ) -> Result<(), Error> { - info!("reading from file: {}", file1); - info!("reading from file: {}", file2); let start = Instant::now(); let mut num = 0usize; let fq1_reader = fastq::Reader::new(file_reader(Some(file1))?); let fq2_reader = fastq::Reader::new(file_reader(Some(file2))?); + info!("reading from file: {}", file1); + info!("reading from file: {}", file2); + let mut fq_writer = fastq::Writer::new(file_writer(out, compression_level)?); - for (rec1, rec2) in fq1_reader .records() .flatten() diff --git a/src/cli/range.rs b/src/cli/range.rs index b42a949..202cae7 100644 --- a/src/cli/range.rs +++ b/src/cli/range.rs @@ -11,6 +11,9 @@ pub fn range_fastq( output: Option<&String>, compression_level: u32, ) -> Result<()> { + let start = Instant::now(); + + let fp_reader = file_reader(input).map(fastq::Reader::new)?; if let Some(file) = input { info!("reading from file: {}", file); } else { @@ -18,11 +21,8 @@ pub fn range_fastq( } info!("skip first {} records", skip); info!("get {} records", take); - let start = Instant::now(); - - let fp_reader = file_reader(input).map(fastq::Reader::new)?; + let mut fp_writer = file_writer(output, compression_level).map(fastq::Writer::new)?; - for rec in fp_reader.records().skip(skip).take(take).flatten() { fp_writer.write_record(&rec)?; } diff --git a/src/cli/remove.rs b/src/cli/remove.rs index 3e35bab..d963ae6 100644 --- a/src/cli/remove.rs +++ b/src/cli/remove.rs @@ -13,31 +13,31 @@ pub fn remove_read( rm: bool, compression_level: u32, ) -> Result<(), Error> { - if let Some(file) = file { - info!("reading reads from file: {}", file); - } else { - info!("reading reads from stdin"); - } - info!("reading reads id form file: {}", name); - if !rm { - info!("removed reads in file: {}", save); - } let start = Instant::now(); let mut ids = vec![]; let list = file_reader(Some(name))?; + info!("reading reads id form file: {}", name); for i in list.lines().map_while(Result::ok) { ids.push(i); } - if ids.is_empty() { error!("reads id list is empty"); std::process::exit(1); } let fq_reader = fastq::Reader::new(file_reader(file)?); + if let Some(file) = file { + info!("reading reads from file: {}", file); + } else { + info!("reading reads from stdin"); + } + + if !rm { + info!("removed reads in file: {}", save); + } + let mut fq_writer = fastq::Writer::new(file_writer(out, compression_level)?); - if rm { for rec in fq_reader.records().flatten() { if !ids.contains(&rec.id().to_string()) { diff --git a/src/cli/rename.rs b/src/cli/rename.rs index 9d623bf..d24f253 100644 --- a/src/cli/rename.rs +++ b/src/cli/rename.rs @@ -14,13 +14,14 @@ pub fn rename_fastq( compression_level: u32, ) -> Result<()> { let start = Instant::now(); + + let fp = fastq::Reader::new(file_reader(input)?); if let Some(file) = input { info!("reading from file: {}", file); } else { info!("reading from stdin"); } - let fp = fastq::Reader::new(file_reader(input)?); let mut fo = fastq::Writer::new(file_writer(output, compression_level)?); let mut n: usize = 0; diff --git a/src/cli/reverse.rs b/src/cli/reverse.rs index c0cb130..e48eebb 100644 --- a/src/cli/reverse.rs +++ b/src/cli/reverse.rs @@ -12,6 +12,8 @@ pub fn reverse_comp_seq( compression_level: u32, ) -> Result<()> { let start = Instant::now(); + + let fq_reader = file_reader(input).map(fastq::Reader::new)?; if let Some(file) = input { info!("reading from file: {}", file); } else { @@ -25,7 +27,6 @@ pub fn reverse_comp_seq( (b'C', b'G'), (b'N', b'N'), ]); - let fq_reader = file_reader(input).map(fastq::Reader::new)?; let mut out_writer = file_writer(out, compression_level).map(fastq::Writer::new)?; for rec in fq_reader.records().flatten() { diff --git a/src/cli/search.rs b/src/cli/search.rs index 60925e5..64b8a97 100644 --- a/src/cli/search.rs +++ b/src/cli/search.rs @@ -16,22 +16,20 @@ pub fn search_fq( compression_level: u32, ) -> Result<(), Error> { let start = Instant::now(); + + let fq_reader = file_reader(fq).map(fastq::Reader::new).unwrap(); if let Some(file) = fq { info!("reading from file: {}", file); } else { info!("reading from stdin"); } + info!("regex pattern is: {}", pat); if ncpu == 1 || ncpu == 0 { info!("thread num is: {}", ncpu); } else { info!("additional thread num is: {}", ncpu); } - if let Some(out) = out { - info!("reads write to file: {}", out); - } else { - info!("reads write to stdout"); - } let mut chunk = chunk; if chunk == 0 { @@ -43,7 +41,12 @@ pub fn search_fq( } let mut num = 0usize; let mut fo = file_writer(out, compression_level).map(fastq::Writer::new)?; - let fq_reader = file_reader(fq).map(fastq::Reader::new).unwrap(); + if let Some(out) = out { + info!("reads write to file: {}", out); + } else { + info!("reads write to stdout"); + } + if ncpu == 1 || ncpu == 0 { let re = RegexBuilder::new(pat) diff --git a/src/cli/select.rs b/src/cli/select.rs index 0aab168..f8742c4 100644 --- a/src/cli/select.rs +++ b/src/cli/select.rs @@ -11,16 +11,14 @@ pub fn select_pe_fastq( out_r2: &String, compression_level: u32, ) -> Result<()> { - info!("read forward reads from file: {}", fq1); - info!("read reverse reads from file: {}", fq2); - info!("output selected read1 file: {}", out_r1); - info!("output selected read2 file: {}", out_r2); let start = Instant::now(); let mut read1_id = HashMap::new(); - let mut read2_id = HashMap::new(); + let mut read2_id = HashMap::new(); let fq_reader1 = file_reader(Some(fq1)).map(fastq::Reader::new)?; let fq_reader2 = file_reader(Some(fq2)).map(fastq::Reader::new)?; + info!("read forward reads from file: {}", fq1); + info!("read reverse reads from file: {}", fq2); for rec in fq_reader1.records().flatten() { let k = rec.id().to_owned(); @@ -30,7 +28,8 @@ pub fn select_pe_fastq( let k = rec.id().to_owned(); read2_id.entry(k).or_insert(()); } - + info!("output selected read1 file: {}", out_r1); + info!("output selected read2 file: {}", out_r2); let mut out_writer1 = file_writer(Some(out_r1), compression_level).map(fastq::Writer::new)?; let mut out_writer2 = file_writer(Some(out_r2), compression_level).map(fastq::Writer::new)?; let (mut pe_r1, mut pe_r2) = (0usize, 0usize); diff --git a/src/cli/shuffle.rs b/src/cli/shuffle.rs index 83b5a22..6bef4f0 100644 --- a/src/cli/shuffle.rs +++ b/src/cli/shuffle.rs @@ -12,17 +12,17 @@ pub fn shuffle_fastq( out: Option<&String>, compression_level: u32, ) -> Result<()> { + let start = Instant::now(); + + let mut rng = Pcg64::seed_from_u64(seed); + let fq_reader = file_reader(file).map(fastq::Reader::new)?; if let Some(file) = file { info!("reading from file: {}", file); } else { info!("reading from stdin"); } info!("rand seed: {}", seed); - let start = Instant::now(); - - let mut rng = Pcg64::seed_from_u64(seed); - let fq_reader = file_reader(file).map(fastq::Reader::new)?; - + let mut vec_reads = vec![]; for rec in fq_reader.records().flatten() { vec_reads.push(rec); diff --git a/src/cli/size.rs b/src/cli/size.rs index ae3025d..37c5ba0 100644 --- a/src/cli/size.rs +++ b/src/cli/size.rs @@ -36,18 +36,15 @@ pub fn size_fastq( compression_level: u32, ) -> Result<()> { let start = Instant::now(); + + let fq_reader = fastq::Reader::new(file_reader(fq)?); if let Some(inp) = fq { info!("reading from file: {}", inp); } else { info!("reading from stdin"); } info!("additional worker threads is: {}", ncpu); - if let Some(file) = out { - info!("output result write to file: {}", file); - } else { - info!("output result write to stdout"); - } - + let mut chunk = chunk; if chunk == 0 { warn!( @@ -56,13 +53,19 @@ pub fn size_fastq( ); chunk = 5000; } - let fq = fastq::Reader::new(file_reader(fq)?); + + if let Some(file) = out { + info!("output result write to file: {}", file); + } else { + info!("output result write to stdout"); + } let mut fo = file_writer(out, compression_level)?; + let mut base = Base::new(); let mut bases = 0usize; if ncpu == 0 || ncpu == 1 { - for rec in fq.records().flatten() { + for rec in fq_reader.records().flatten() { base.read += 1; for nt in rec.seq().iter() { match *nt { @@ -78,7 +81,7 @@ pub fn size_fastq( bases = base.a + base.t + base.g + base.c + base.n; } else { let (tx, rx) = unbounded(); - let mut fqiter = fq.records(); + let mut fqiter = fq_reader.records(); loop { let chunk: Vec<_> = fqiter.by_ref().take(chunk).flatten().collect(); if chunk.is_empty() { diff --git a/src/cli/slide.rs b/src/cli/slide.rs index 2ca1e81..653157d 100644 --- a/src/cli/slide.rs +++ b/src/cli/slide.rs @@ -12,6 +12,10 @@ pub fn slide_fastq( suffix: &str, compression_level: u32, ) -> Result<()> { + + let start = Instant::now(); + + let fq_reader = file_reader(file).map(fastq::Reader::new)?; if let Some(file) = file { info!("reading from file: {}", file); } else { @@ -19,9 +23,6 @@ pub fn slide_fastq( } info!("window size : {}", wind); info!("step size: {}", step); - let start = Instant::now(); - - let fq_reader = file_reader(file).map(fastq::Reader::new)?; let mut fq_writer = file_writer(out, compression_level).map(fastq::Writer::new)?; let mut window = wind; for rec in fq_reader.records().flatten() { diff --git a/src/cli/sort.rs b/src/cli/sort.rs index efeee3f..249e4b9 100644 --- a/src/cli/sort.rs +++ b/src/cli/sort.rs @@ -14,14 +14,8 @@ pub fn sort_fastq( out: Option<&String>, compression_level: u32, ) -> Result<(), Error> { - if let Some(file) = file { - info!("reading from file: {}", file); - } else { - info!("reading from stdin"); - } - if reverse { - info!("output reversed result"); - } + let start = Instant::now(); + let mut n = 0; if sort_by_gc { n += 1; @@ -43,9 +37,17 @@ pub fn sort_fastq( error!("please specifiy one of the flags: -l, -n, -g, -s"); std::process::exit(1); } - let start = Instant::now(); let fq_reader = file_reader(file).map(fastq::Reader::new)?; + if let Some(file) = file { + info!("reading from file: {}", file); + } else { + info!("reading from stdin"); + } + if reverse { + info!("output reversed result"); + } + let mut vec_reads = vec![]; for rec in fq_reader.records().flatten() { vec_reads.push(rec); diff --git a/src/cli/split.rs b/src/cli/split.rs index 361bb96..d664a52 100644 --- a/src/cli/split.rs +++ b/src/cli/split.rs @@ -2,32 +2,67 @@ use crate::utils::*; use anyhow::{Error, Ok}; use bio::io::fastq; use log::*; -use std::time::Instant; +use std::{path::PathBuf, time::Instant}; pub fn split_interleaved( file: Option<&String>, out_dir: &String, out_pre: &String, + gzip: bool, + bzip2: bool, + xz: bool, compression_level: u32, ) -> Result<(), Error> { let start = Instant::now(); - - let pre1 = format!("{}/{}_r1.fq.gz", out_dir, out_pre); - let pre2 = format!("{}/{}_r2.fq.gz", out_dir, out_pre); - let mut fh1 = fastq::Writer::new(file_writer_append(&pre1, compression_level)?); - let mut fh2 = fastq::Writer::new(file_writer_append(&pre2, compression_level)?); - + + let mut n = 0; + if gzip { + n += 1; + } + if bzip2 { + n += 1; + } + if xz { + n += 1; + } + if n > 1 { + error!("only one of the flags --gzip , --xz and --bzip2 is allowed"); + std::process::exit(1); + } + let fq_reader = fastq::Reader::new(file_reader(file)?); if let Some(file) = file { info!("reading from file: {}", file); } else { info!("reading from stdin"); } - info!("read1 output file: {}", pre1); - info!("read2 output file: {}", pre2); + + //let pre1 = format!("{}/{}_r1.fq.gz", out_dir, out_pre); + let pre1 = if gzip { + PathBuf::from(out_dir).join(format!("{}_r1.fq.gz",out_pre)) + } else if bzip2 { + PathBuf::from(out_dir).join(format!("{}_r1.fq.bz2",out_pre)) + } else if xz { + PathBuf::from(out_dir).join(format!("{}_r1.fq.xz",out_pre)) + } else { + PathBuf::from(out_dir).join(format!("{}_r1.fq",out_pre)) + }; + //let pre2 = format!("{}/{}_r2.fq.gz", out_dir, out_pre); + let pre2 = if gzip { + PathBuf::from(out_dir).join(format!("{}_r2.fq.gz",out_pre)) + } else if bzip2 { + PathBuf::from(out_dir).join(format!("{}_r2.fq.bz2",out_pre)) + } else if xz { + PathBuf::from(out_dir).join(format!("{}_r2.fq.xz",out_pre)) + } else { + PathBuf::from(out_dir).join(format!("{}_r2.fq",out_pre)) + }; + let mut fh1 = fastq::Writer::new(file_writer_append(&pre1, compression_level)?); + info!("read1 output file: {:?}", pre1); + let mut fh2 = fastq::Writer::new(file_writer_append(&pre2, compression_level)?); + info!("read2 output file: {:?}", pre2); let mut num = 0usize; let mut flag = true; - let fq_reader = fastq::Reader::new(file_reader(file)?); for rec in fq_reader.records().flatten() { num += 1; if flag { diff --git a/src/cli/split2.rs b/src/cli/split2.rs index 3fd15c2..0b69781 100644 --- a/src/cli/split2.rs +++ b/src/cli/split2.rs @@ -2,7 +2,7 @@ use crate::utils::*; use anyhow::{Error, Ok}; use bio::io::fastq; use log::*; -use std::time::Instant; +use std::{path::PathBuf, time::Instant}; pub fn split_chunk( file: Option<&String>, @@ -15,11 +15,7 @@ pub fn split_chunk( compression_level: u32, ) -> Result<(), Error> { let start = Instant::now(); - if let Some(file) = file { - info!("reading from file: {}", file); - } else { - info!("reading from stdin"); - } + let mut n = 0; if gzip { n += 1; @@ -37,22 +33,31 @@ pub fn split_chunk( let (mut flag, mut index) = (0usize, 0usize); let out = if gzip { - format!("{}/{}{}.fastq.gz", out_dir, out_pre, index) + //format!("{}/{}{}.fastq.gz", out_dir, out_pre, index) + PathBuf::from(out_dir).join(format!("{}{}.fq.gz",out_pre,index)) } else if bzip2 { - format!("{}/{}{}.fastq.bz2", out_dir, out_pre, index) + //format!("{}/{}{}.fastq.bz2", out_dir, out_pre, index) + PathBuf::from(out_dir).join(format!("{}{}.fq.bz2",out_pre,index)) } else if xz { - format!("{}/{}{}.fastq.xz", out_dir, out_pre, index) + //format!("{}/{}{}.fastq.xz", out_dir, out_pre, index) + PathBuf::from(out_dir).join(format!("{}{}.fq.xz",out_pre,index)) } else { - format!("{}/{}{}.fastq", out_dir, out_pre, index) + //format!("{}/{}{}.fastq", out_dir, out_pre, index) + PathBuf::from(out_dir).join(format!("{}{}.fq",out_pre,index)) }; let fq_reader = fastq::Reader::new(file_reader(file)?); + if let Some(file) = file { + info!("reading from file: {}", file); + } else { + info!("reading from stdin"); + } let mut fh = vec![fastq::Writer::new(file_writer( Some(&out), compression_level, )?)]; - info!("start to write file: {}", out); + info!("start to write file: {:?}", out); for rec in fq_reader.records().flatten() { if flag < num { let fhthis = fh.get_mut(index).unwrap(); @@ -61,13 +66,13 @@ pub fn split_chunk( } else { index += 1; let out = if gzip { - format!("{}/{}{}.fastq.gz", out_dir, out_pre, index) + PathBuf::from(out_dir).join(format!("{}{}.fq.gz",out_pre,index)) } else if bzip2 { - format!("{}/{}{}.fastq.bz2", out_dir, out_pre, index) + PathBuf::from(out_dir).join(format!("{}{}.fq.bz2",out_pre,index)) } else if xz { - format!("{}/{}{}.fastq.xz", out_dir, out_pre, index) + PathBuf::from(out_dir).join(format!("{}{}.fq.xz",out_pre,index)) } else { - format!("{}/{}{}.fastq", out_dir, out_pre, index) + PathBuf::from(out_dir).join(format!("{}{}.fq",out_pre,index)) }; fh.push(fastq::Writer::new(file_writer( Some(&out), @@ -75,7 +80,7 @@ pub fn split_chunk( )?)); let fhthis = fh.get_mut(index).unwrap(); - info!("start to write file: {}", out); + info!("start to write file: {:?}", out); fhthis.write(rec.id(), rec.desc(), rec.seq(), rec.qual())?; flag = 1; // already write one record in this loop, flag add one } diff --git a/src/cli/stats.rs b/src/cli/stats.rs index fdbd555..f583f80 100644 --- a/src/cli/stats.rs +++ b/src/cli/stats.rs @@ -91,11 +91,13 @@ pub fn stat_fq( phred: u8, compression_level: u32, ) -> Result<()> { + let start = Instant::now(); if ![33u8, 64u8].contains(&phred) { error!("invalid phred value"); std::process::exit(1); } - let start = Instant::now(); + + let fq = fastq::Reader::new(file_reader(inp)?); if let Some(inp) = inp { info!("reading from file: {}", inp); } else { @@ -108,7 +110,7 @@ pub fn stat_fq( info!("cycle result write to stdout"); } - let fq = fastq::Reader::new(file_reader(inp)?); + let mut fo = file_writer(Some(pre_sum), compression_level)?; let mut fc = file_writer(pre_cyc, compression_level)?; diff --git a/src/cli/subfq.rs b/src/cli/subfq.rs index 52b4330..c13a909 100644 --- a/src/cli/subfq.rs +++ b/src/cli/subfq.rs @@ -14,6 +14,12 @@ fn select_fastq( out: Option<&String>, compression_level: u32, ) -> Result<(), Error> { + let start = Instant::now(); + + let mut rng = Pcg64::seed_from_u64(seed); + let mut get: Vec = Vec::with_capacity(n); + + let fq_reader = fastq::Reader::new(file_reader(file)?); if let Some(file) = file { info!("reading from file: {}", file); } else { @@ -22,12 +28,7 @@ fn select_fastq( info!("rand seed: {}", seed); info!("subseq number: {}", n); info!("reduce much memory but cost more time"); - let start = Instant::now(); - - let mut rng = Pcg64::seed_from_u64(seed); - let mut get: Vec = Vec::with_capacity(n); - - let fq_reader = fastq::Reader::new(file_reader(file)?); + for (order, _) in fq_reader.records().flatten().enumerate() { if order < n { get.push(order); diff --git a/src/cli/tail.rs b/src/cli/tail.rs index d4f5f0a..412b850 100644 --- a/src/cli/tail.rs +++ b/src/cli/tail.rs @@ -10,15 +10,16 @@ pub fn tail_n_records( output: Option<&String>, compression_level: u32, ) -> Result<()> { + let start = Instant::now(); + + let fp = fastq::Reader::new(file_reader(input)?); if let Some(file) = input { info!("reading from file: {}", file); } else { info!("reading from stdin"); } info!("get tail {} records", number); - let start = Instant::now(); - - let fp = fastq::Reader::new(file_reader(input)?); + let mut fo = fastq::Writer::new(file_writer(output, compression_level)?); let mut total = 0usize; diff --git a/src/cli/top.rs b/src/cli/top.rs index 4a07c4c..1e52c1e 100644 --- a/src/cli/top.rs +++ b/src/cli/top.rs @@ -10,15 +10,16 @@ pub fn top_n_records( output: Option<&String>, compression_level: u32, ) -> Result<()> { + let start = Instant::now(); + + let fp = fastq::Reader::new(file_reader(input)?); if let Some(file) = input { info!("reading from file: {}", file); } else { info!("reading from stdin"); } info!("get top {} records", number); - let start = Instant::now(); - - let fp = fastq::Reader::new(file_reader(input)?); + let mut fo = fastq::Writer::new(file_writer(output, compression_level)?); for rec in fp.records().take(number).flatten() { fo.write_record(&rec)?; diff --git a/src/cli/trimfq.rs b/src/cli/trimfq.rs index 9487279..07c9565 100644 --- a/src/cli/trimfq.rs +++ b/src/cli/trimfq.rs @@ -13,16 +13,16 @@ pub fn trim_fq( compression_level: u32, ) -> Result<()> { let start = Instant::now(); + + let length = right + left; + let fq_reader = fastq::Reader::new(file_reader(file)?); if let Some(file) = file { info!("reading from file: {}", file); } else { info!("reading from stdin"); } - let length = right + left; - let fq_reader = fastq::Reader::new(file_reader(file)?); let mut fq_writer = fastq::Writer::new(file_writer(out, compression_level)?); - for (idx, rec) in fq_reader.records().flatten().enumerate() { let rlen = rec.seq().len(); if left >= rlen || right >= rlen || length >= rlen { diff --git a/src/command.rs b/src/command.rs index 7e76737..d11ebd8 100644 --- a/src/command.rs +++ b/src/command.rs @@ -544,6 +544,15 @@ pub enum Subcli { /// input fastq file, or read from stdin //#[arg(short = 'i', long = "input" ,value_name = "STR")] input: Option, + /// if specified, output gzip compressed file + #[arg(short = 'z', long = "gzip", help_heading = Some("FLAGS"))] + gzip: bool, + /// if specified, output bzip2 compressed file + #[arg(short = 'Z', long = "bzip2", help_heading = Some("FLAGS"))] + bzip2: bool, + /// if specified, output xz compressed file + #[arg(short = 'x', long = "xz", help_heading = Some("FLAGS"))] + xz: bool, /// output fastq file prefix name #[arg(short = 'p', long = "prefix" , default_value_t = String::from("demo"), value_name = "STR")] pre: String, diff --git a/src/main.rs b/src/main.rs index 4f30d6f..bf886a7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -361,8 +361,8 @@ fn main() -> Result<(), Error> { Subcli::reverse { input, rev, out } => { reverse_comp_seq(input.as_ref(), out.as_ref(), rev, arg.compression_level)?; } - Subcli::split { input, pre, out } => { - split_interleaved(input.as_ref(), &out, &pre, arg.compression_level)?; + Subcli::split { input, gzip, bzip2, xz, pre, out } => { + split_interleaved(input.as_ref(), &out, &pre, gzip, bzip2, xz, arg.compression_level)?; } Subcli::merge { read1, read2, out } => { interleaved(&read1, &read2, out.as_ref(), arg.compression_level)?; diff --git a/src/utils.rs b/src/utils.rs index 7167ac9..beb4686 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,4 +1,5 @@ use anyhow::{Error, Ok, Result}; +use log::*; use std::{ fs::{File, OpenOptions}, io::{self, prelude::*, BufRead, BufReader, BufWriter, Write}, @@ -74,6 +75,10 @@ where Ok(Box::new(BufReader::with_capacity(BUFF_SIZE, fp))) } } else { + if atty::is(atty::Stream::Stdin) { + error!("stdin not detected"); + std::process::exit(1); + } let fp = BufReader::new(io::stdin()); Ok(Box::new(fp)) }