Skip to content

Commit

Permalink
add subcli kmer
Browse files Browse the repository at this point in the history
  • Loading branch information
sharkLoc committed Jun 14, 2024
1 parent a34b8fb commit 691e242
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 5 deletions.
12 changes: 11 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "fqkit"
version = "0.4.8"
version = "0.4.9"
edition = "2021"
authors = ["sharkLoc <[email protected]>"]
rust-version = "1.77.2"
Expand Down Expand Up @@ -28,6 +28,7 @@ env_logger = "0.10.0"
flate2 = "1.0.24"
log = "0.4.20"
lowcharts = "0.5.8"
nthash = "0.5.1"
plotters = "0.3.4"
rand = "0.8.5"
rand_pcg = "0.3.1"
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ cargo install --git https://github.com/sharkLoc/fqkit.git
```bash
FqKit -- A simple and cross-platform program for fastq file manipulation

Version: 0.4.8
Version: 0.4.9

Authors: sharkLoc <[email protected]>
Source code: https://github.com/sharkLoc/fqkit.git
Expand Down Expand Up @@ -72,6 +72,7 @@ Commands:
search search reads/motifs from fastq file
grep grep fastq sequence by read id or full name
stats summary for fastq format file [aliases: stat]
kmer sample kmer count
shuffle shuffle fastq sequences
size report the number sequences and bases
slide extract subsequences in sliding windows
Expand Down
56 changes: 56 additions & 0 deletions src/cli/kmer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use anyhow::Error;
use bio::io::fastq;
use log::*;
use std::{collections::HashMap, time::Instant};
use crate::utils::*;
use nthash::nthash;


pub fn kmer_count(
input: Option<&String>,
kmer_len: usize,
header: bool,
output: Option<&String>,
compression_level: u32,
) -> Result<(),Error> {
let start = Instant::now();
let reader = file_reader(input).map(fastq::Reader::new)?;
if let Some(file) = input {
info!("reading from file: {}", file);
} else {
info!("reading from stdin");
}

let mut writer = file_writer(output, compression_level)?;
let mut kmers = HashMap::new();
let (mut sidx, mut eidx) = (0,kmer_len);

for rec in reader.records().flatten() {
let khash = nthash(rec.seq(), kmer_len);

let end = rec.seq().len() - kmer_len + 1;
while eidx <= end {
let kseq = &rec.seq()[sidx..eidx];
let khash_this = nthash(kseq, kmer_len)[0];
for k in khash.iter() {
if khash_this.eq(k) {
*kmers.entry(kseq.to_owned()).or_insert(0_u64) += 1;
}
}

sidx += 1;
eidx += 1;
}
}

if header {
writer.write_all("kmer\tcount\n".as_bytes())?;
}
for (k,v) in kmers {
writer.write_all(k.as_slice())?;
writer.write_all(format!("\t{}\n",v).as_bytes())?;
}
writer.flush()?;
info!("time elapsed is: {:?}", start.elapsed());
Ok(())
}
1 change: 1 addition & 0 deletions src/cli/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ pub mod tail;
pub mod top;
pub mod trimfq;
pub mod view;
pub mod kmer;
16 changes: 15 additions & 1 deletion src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use clap::{value_parser, ArgAction, Parser};
#[command(
name = "FqKit",
author = "sharkLoc",
version = "0.4.8",
version = "0.4.9",
about = "A simple and cross-platform program for fastq file manipulation",
long_about = None,
next_line_help = false,
Expand Down Expand Up @@ -287,6 +287,20 @@ pub enum Subcli {
#[arg(short = 'c', long = "cycle", value_name = "FILE")]
cyc: Option<String>,
},
/// sample kmer count
kmer {
/// input fastq file, or read from stdin
input: Option<String>,
/// set kmer size
#[arg(short = 'k', long = "kmer-size", default_value_t = 21, value_name = "INT")]
size: usize,
/// add header info in output file
#[arg(short = 'H', long, help_heading = Some("FLAGS"))]
header: bool,
/// output file name or write to stdout, file ending in .gz/.bz2/.xz will be compressed automatically
#[arg(short = 'o', long = "out", value_name = "FILE")]
out: Option<String>,
},
/// shuffle fastq sequences
#[command(before_help = "note: all records will be readed into memory")]
shuffle {
Expand Down
5 changes: 4 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use cli::{
barcode::*, check::*, concat::*, cutadapter::cut_adapter, filter::*, flatten::*, fq2fa::*,
fq2sam::*, fqscore::*, gcplot::*, grep::*, length::*, mask::*, merge::*, plot::*, range::*,
remove::*, rename::*, reverse::*, search::*, select::*, shuffle::*, size::*, slide::*, sort::*,
split::*, split2::*, stats::*, subfq::*, tail::*, top::*, trimfq::*, view::*,
split::*, split2::*, stats::*, subfq::*, tail::*, top::*, trimfq::*, view::*, kmer::*,
};

fn main() -> Result<(), Error> {
Expand Down Expand Up @@ -470,6 +470,9 @@ fn main() -> Result<(), Error> {
Subcli::view { input, out } => {
view_fq(input.as_ref(), out.as_ref(), arg.compression_level)?;
}
Subcli::kmer { input, size, header, out } => {
kmer_count(input.as_ref(), size, header, out.as_ref(), arg.compression_level)?;
}
}

Ok(())
Expand Down

0 comments on commit 691e242

Please sign in to comment.