Skip to content

Commit

Permalink
update to version 0.2.6
Browse files Browse the repository at this point in the history
  • Loading branch information
sharkLoc committed Sep 19, 2023
1 parent 292d5d1 commit f79a5c5
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 4 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "fqkit"
version = "0.2.5"
version = "0.2.6"
edition = "2021"
authors = ["sharkLoc <[email protected]>"]
rust-version = "1.65.0"
Expand Down
Binary file added example/mini2k.fq.gz
Binary file not shown.
143 changes: 143 additions & 0 deletions src/gcplot.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
use anyhow::{Ok, Error};
use bio::io::fastq;
use std::collections::HashMap;
use log::*;
use plotters::prelude::*;
use crate::utils::*;
use std::time::Instant;

pub fn gc_content(
fqin: &Option<&str>,
output: &Option<&str>,
prefix: String,
width: usize,
height: usize,
ylim: usize,
types: &str,
) -> Result<(),Error> {
if let Some(inp) = fqin {
info!("reading from file: {}", inp);
} else {
info!("reading from stdin");
}
let start = Instant::now();

let fq_reader = file_reader(fqin).map(fastq::Reader::new)?;
let mut fo = file_writer(output)?;
let mut df_hash = HashMap::new() ;

for rec in fq_reader.records().flatten() {
let gc_count = rec.seq().iter().filter(|x| *x ==&b'G'|| *x==&b'C').count();
let gc_ratio = (gc_count as f64 / rec.seq().len() as f64 *100.0).round() as u64;
*df_hash.entry(gc_ratio).or_insert(0) +=1usize;

}

let mut df_ret = vec![];
fo.write(format!("gc_content(%)\tratio(%)\n").as_bytes())?;
let total = df_hash.values().sum::<usize>() as f32;
for i in 0..=100 {
let v = (*df_hash.get(&i).unwrap_or(&0) as f32 *10000.0 / total ).round() / 100.0;
df_ret.push(v);
fo.write(format!("{}\t{}\n",i,v).as_bytes())?;
}

plot_gc(df_ret, prefix, width, height, ylim, types)?;

info!("time elapsed is: {:?}",start.elapsed());
Ok(())
}

// GC content line plot
fn plot_gc(
data: Vec<f32>,
prefix: String,
width: usize,
height: usize,
ylim: usize,
types: &str
) -> Result<(),Error> {
if !["svg", "png"].contains(&types) {
error!("invalid args types.");
std::process::exit(1);
}
if ylim > 100 {
error!("invalid args ylim.");
std::process::exit(1);
}
let name = if types == "png" {format!("{}.png",prefix)} else {format!("{}.svg",prefix)};
info!("output gc content plot: {}",name);

if types == "png" {
let png = BitMapBackend::new(&name, (width as u32, height as u32)).into_drawing_area();
png.fill(&WHITE)?;

let mut charts = ChartBuilder::on(&png)
.margin(10)
.caption("GC distrbution plot", ("sans-serif", 30).into_font())
.x_label_area_size(40)
.y_label_area_size(40)
//.build_cartesian_2d(0..100, 0..ylim)?;
.build_cartesian_2d(0.0..100f32 , 0.0..ylim as f32)?;

charts
.configure_mesh()
.x_labels(20)
.x_desc("GC CONTENT")
.x_label_formatter(&|x| format!("{:.0}%",x))
.y_labels(20)
.y_label_formatter(&|x| format!("{:.0}%", x))
.y_desc("percent")
.draw()?;

charts.draw_series(
AreaSeries::new((0..).zip(data.iter()).map(|(x,y)| (x as f32, *y)), 0., &GREEN.mix(0.5)).border_style(&GREEN),
)?
.label("GC content")
.legend(|(x, y)| PathElement::new(vec![(x, y), (x + 20, y)], &GREEN));

//charts.draw_series( LineSeries::new((0..).zip(data.iter()).map(|(x,y)| (x as f32, *y)), &BLACK) )?;

charts
.configure_series_labels()
.background_style(&WHITE.mix(0.9))
.border_style(&BLACK)
.position(SeriesLabelPosition::UpperRight)
.draw()?;

} else {
let svg = SVGBackend::new(&name, (width as u32, height as u32)).into_drawing_area();
svg.fill(&WHITE)?;

let mut charts = ChartBuilder::on(&svg)
.margin(10)
.caption("GC distrbution plot", ("sans-serif", 30).into_font())
.x_label_area_size(40)
.y_label_area_size(40)
.build_cartesian_2d(0.0..100f32 , 0.0..ylim as f32)?;

charts
.configure_mesh()
.x_labels(20)
.x_desc("GC CONTENT")
.x_label_formatter(&|x| format!("{:.0}%",x))
.y_labels(20)
.y_label_formatter(&|x| format!("{:.0}%", x))
.y_desc("percent")
.draw()?;

charts.draw_series(
AreaSeries::new((0..).zip(data.iter()).map(|(x,y)| (x as f32, *y)), 0., &GREEN.mix(0.5)).border_style(&GREEN),
)?
.label("GC content")
.legend(|(x, y)| PathElement::new(vec![(x, y), (x + 20, y)], &GREEN));

charts
.configure_series_labels()
.background_style(&WHITE.mix(0.9))
.border_style(&BLACK)
.position(SeriesLabelPosition::UpperRight)
.draw()?;
}
Ok(())
}
44 changes: 42 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ use env_logger::{Builder,fmt::Color};
use log::{error, warn, info, LevelFilter,Level};
use std::io::Write;

mod gcplot;
use gcplot::*;
mod split;
use split::*;
mod search;
Expand All @@ -31,7 +33,7 @@ mod utils;
#[derive(Parser, Debug)]
#[command(
author = "size_t",
version = "version 0.2.5",
version = "version 0.2.6",
about = "fqkit: a simple program for fastq file manipulation",
long_about = None,
next_line_help = true
Expand Down Expand Up @@ -188,6 +190,29 @@ enum Subcli {
/// output interleaved fastq file name.
#[arg(short = 'o', long = "out", default_value_t = String::from("interleaved.fq.gz"))]
out: String,
},
/// get GC content result and plot
gcplot {
/// input fastq[.gz] file, or read from stdin
input: Option<String>,
/// output GC contnet result file name
#[arg(short = 'o', long = "out")]
output: Option<String>,
/// output base figure prefix name
#[arg(short='p', long="prefix", default_value_t=String::from("gc_plot"))]
prefix: String,
/// set output figure width
#[arg(short = 'W', long = "width", default_value_t = 960)]
width: usize,
/// set output figure height
#[arg(short = 'H', long = "height", default_value_t = 540)]
height: usize,
/// set max ylim (0~100)
#[arg(short = 'y', long = "ylim", default_value_t = 15)]
ylim: usize,
/// figure type 'png' or 'svg'
#[arg(short='t', long="types", default_value_t=String::from("png"))]
types: String,
}
}

Expand Down Expand Up @@ -371,7 +396,22 @@ fn main() -> Result<(), Error> {
out,
} => {
interleaved(&Some(read1.as_str()), &Some(read2.as_str()), &Some(out.as_str()))?;
}
}
Subcli::gcplot { input, output, prefix, width, height, ylim, types } => {
if let Some(input) = input {
if let Some(output) = output {
gc_content(&Some(&input), &Some(&output), prefix, width, height, ylim, &types)?;
} else {
gc_content(&Some(&input),&None, prefix, width, height, ylim, &types)?;
}
} else {
if let Some(output) = output {
gc_content(&None,&Some(&output), prefix, width, height, ylim, &types)?;
} else {
gc_content(&None,&None, prefix, width, height, ylim, &types)?;
}
}
}
}

Ok(())
Expand Down

0 comments on commit f79a5c5

Please sign in to comment.