Skip to content

Commit

Permalink
fix kmer count method
Browse files Browse the repository at this point in the history
  • Loading branch information
sharkLoc committed Jun 14, 2024
1 parent 691e242 commit c73d90a
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 22 deletions.
25 changes: 10 additions & 15 deletions src/cli/kmer.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
use crate::utils::*;
use anyhow::Error;
use bio::io::fastq;
use log::*;
use std::{collections::HashMap, time::Instant};
use crate::utils::*;
use nthash::nthash;

use std::{collections::HashMap, time::Instant};

pub fn kmer_count(
input: Option<&String>,
kmer_len: usize,
header: bool,
output: Option<&String>,
compression_level: u32,
) -> Result<(),Error> {
) -> Result<(), Error> {
let start = Instant::now();
let reader = file_reader(input).map(fastq::Reader::new)?;
if let Some(file) = input {
Expand All @@ -23,21 +22,17 @@ pub fn kmer_count(

let mut writer = file_writer(output, compression_level)?;
let mut kmers = HashMap::new();
let (mut sidx, mut eidx) = (0,kmer_len);
let (mut sidx, mut eidx) = (0, kmer_len);

for rec in reader.records().flatten() {
let khash = nthash(rec.seq(), kmer_len);

let end = rec.seq().len() - kmer_len + 1;
while eidx <= end {
while eidx <= rec.seq().len() {
let kseq = &rec.seq()[sidx..eidx];
let khash_this = nthash(kseq, kmer_len)[0];
for k in khash.iter() {
if khash_this.eq(k) {
*kmers.entry(kseq.to_owned()).or_insert(0_u64) += 1;
}
if khash.contains(&khash_this) {
*kmers.entry(kseq.to_owned()).or_insert(0_u64) += 1;
}

sidx += 1;
eidx += 1;
}
Expand All @@ -46,11 +41,11 @@ pub fn kmer_count(
if header {
writer.write_all("kmer\tcount\n".as_bytes())?;
}
for (k,v) in kmers {
for (k, v) in kmers {
writer.write_all(k.as_slice())?;
writer.write_all(format!("\t{}\n",v).as_bytes())?;
writer.write_all(format!("\t{}\n", v).as_bytes())?;
}
writer.flush()?;
info!("time elapsed is: {:?}", start.elapsed());
Ok(())
}
}
2 changes: 1 addition & 1 deletion src/cli/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pub mod fq2sam;
pub mod fqscore;
pub mod gcplot;
pub mod grep;
pub mod kmer;
pub mod length;
pub mod mask;
pub mod merge;
Expand All @@ -31,4 +32,3 @@ pub mod tail;
pub mod top;
pub mod trimfq;
pub mod view;
pub mod kmer;
7 changes: 6 additions & 1 deletion src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,12 @@ pub enum Subcli {
/// input fastq file, or read from stdin
input: Option<String>,
/// set kmer size
#[arg(short = 'k', long = "kmer-size", default_value_t = 21, value_name = "INT")]
#[arg(
short = 'k',
long = "kmer-size",
default_value_t = 21,
value_name = "INT"
)]
size: usize,
/// add header info in output file
#[arg(short = 'H', long, help_heading = Some("FLAGS"))]
Expand Down
22 changes: 17 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ use command::*;
mod cli;
use cli::{
barcode::*, check::*, concat::*, cutadapter::cut_adapter, filter::*, flatten::*, fq2fa::*,
fq2sam::*, fqscore::*, gcplot::*, grep::*, length::*, mask::*, merge::*, plot::*, range::*,
remove::*, rename::*, reverse::*, search::*, select::*, shuffle::*, size::*, slide::*, sort::*,
split::*, split2::*, stats::*, subfq::*, tail::*, top::*, trimfq::*, view::*, kmer::*,
fq2sam::*, fqscore::*, gcplot::*, grep::*, kmer::*, length::*, mask::*, merge::*, plot::*,
range::*, remove::*, rename::*, reverse::*, search::*, select::*, shuffle::*, size::*,
slide::*, sort::*, split::*, split2::*, stats::*, subfq::*, tail::*, top::*, trimfq::*,
view::*,
};

fn main() -> Result<(), Error> {
Expand Down Expand Up @@ -470,8 +471,19 @@ fn main() -> Result<(), Error> {
Subcli::view { input, out } => {
view_fq(input.as_ref(), out.as_ref(), arg.compression_level)?;
}
Subcli::kmer { input, size, header, out } => {
kmer_count(input.as_ref(), size, header, out.as_ref(), arg.compression_level)?;
Subcli::kmer {
input,
size,
header,
out,
} => {
kmer_count(
input.as_ref(),
size,
header,
out.as_ref(),
arg.compression_level,
)?;
}
}

Expand Down

0 comments on commit c73d90a

Please sign in to comment.