-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
187 additions
and
4 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[package] | ||
name = "fqkit" | ||
version = "0.2.5" | ||
version = "0.2.6" | ||
edition = "2021" | ||
authors = ["sharkLoc <[email protected]>"] | ||
rust-version = "1.65.0" | ||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
use anyhow::{Ok, Error}; | ||
use bio::io::fastq; | ||
use std::collections::HashMap; | ||
use log::*; | ||
use plotters::prelude::*; | ||
use crate::utils::*; | ||
use std::time::Instant; | ||
|
||
pub fn gc_content( | ||
fqin: &Option<&str>, | ||
output: &Option<&str>, | ||
prefix: String, | ||
width: usize, | ||
height: usize, | ||
ylim: usize, | ||
types: &str, | ||
) -> Result<(),Error> { | ||
if let Some(inp) = fqin { | ||
info!("reading from file: {}", inp); | ||
} else { | ||
info!("reading from stdin"); | ||
} | ||
let start = Instant::now(); | ||
|
||
let fq_reader = file_reader(fqin).map(fastq::Reader::new)?; | ||
let mut fo = file_writer(output)?; | ||
let mut df_hash = HashMap::new() ; | ||
|
||
for rec in fq_reader.records().flatten() { | ||
let gc_count = rec.seq().iter().filter(|x| *x ==&b'G'|| *x==&b'C').count(); | ||
let gc_ratio = (gc_count as f64 / rec.seq().len() as f64 *100.0).round() as u64; | ||
*df_hash.entry(gc_ratio).or_insert(0) +=1usize; | ||
|
||
} | ||
|
||
let mut df_ret = vec![]; | ||
fo.write(format!("gc_content(%)\tratio(%)\n").as_bytes())?; | ||
let total = df_hash.values().sum::<usize>() as f32; | ||
for i in 0..=100 { | ||
let v = (*df_hash.get(&i).unwrap_or(&0) as f32 *10000.0 / total ).round() / 100.0; | ||
df_ret.push(v); | ||
fo.write(format!("{}\t{}\n",i,v).as_bytes())?; | ||
} | ||
|
||
plot_gc(df_ret, prefix, width, height, ylim, types)?; | ||
|
||
info!("time elapsed is: {:?}",start.elapsed()); | ||
Ok(()) | ||
} | ||
|
||
// GC content line plot | ||
fn plot_gc( | ||
data: Vec<f32>, | ||
prefix: String, | ||
width: usize, | ||
height: usize, | ||
ylim: usize, | ||
types: &str | ||
) -> Result<(),Error> { | ||
if !["svg", "png"].contains(&types) { | ||
error!("invalid args types."); | ||
std::process::exit(1); | ||
} | ||
if ylim > 100 { | ||
error!("invalid args ylim."); | ||
std::process::exit(1); | ||
} | ||
let name = if types == "png" {format!("{}.png",prefix)} else {format!("{}.svg",prefix)}; | ||
info!("output gc content plot: {}",name); | ||
|
||
if types == "png" { | ||
let png = BitMapBackend::new(&name, (width as u32, height as u32)).into_drawing_area(); | ||
png.fill(&WHITE)?; | ||
|
||
let mut charts = ChartBuilder::on(&png) | ||
.margin(10) | ||
.caption("GC distrbution plot", ("sans-serif", 30).into_font()) | ||
.x_label_area_size(40) | ||
.y_label_area_size(40) | ||
//.build_cartesian_2d(0..100, 0..ylim)?; | ||
.build_cartesian_2d(0.0..100f32 , 0.0..ylim as f32)?; | ||
|
||
charts | ||
.configure_mesh() | ||
.x_labels(20) | ||
.x_desc("GC CONTENT") | ||
.x_label_formatter(&|x| format!("{:.0}%",x)) | ||
.y_labels(20) | ||
.y_label_formatter(&|x| format!("{:.0}%", x)) | ||
.y_desc("percent") | ||
.draw()?; | ||
|
||
charts.draw_series( | ||
AreaSeries::new((0..).zip(data.iter()).map(|(x,y)| (x as f32, *y)), 0., &GREEN.mix(0.5)).border_style(&GREEN), | ||
)? | ||
.label("GC content") | ||
.legend(|(x, y)| PathElement::new(vec![(x, y), (x + 20, y)], &GREEN)); | ||
|
||
//charts.draw_series( LineSeries::new((0..).zip(data.iter()).map(|(x,y)| (x as f32, *y)), &BLACK) )?; | ||
|
||
charts | ||
.configure_series_labels() | ||
.background_style(&WHITE.mix(0.9)) | ||
.border_style(&BLACK) | ||
.position(SeriesLabelPosition::UpperRight) | ||
.draw()?; | ||
|
||
} else { | ||
let svg = SVGBackend::new(&name, (width as u32, height as u32)).into_drawing_area(); | ||
svg.fill(&WHITE)?; | ||
|
||
let mut charts = ChartBuilder::on(&svg) | ||
.margin(10) | ||
.caption("GC distrbution plot", ("sans-serif", 30).into_font()) | ||
.x_label_area_size(40) | ||
.y_label_area_size(40) | ||
.build_cartesian_2d(0.0..100f32 , 0.0..ylim as f32)?; | ||
|
||
charts | ||
.configure_mesh() | ||
.x_labels(20) | ||
.x_desc("GC CONTENT") | ||
.x_label_formatter(&|x| format!("{:.0}%",x)) | ||
.y_labels(20) | ||
.y_label_formatter(&|x| format!("{:.0}%", x)) | ||
.y_desc("percent") | ||
.draw()?; | ||
|
||
charts.draw_series( | ||
AreaSeries::new((0..).zip(data.iter()).map(|(x,y)| (x as f32, *y)), 0., &GREEN.mix(0.5)).border_style(&GREEN), | ||
)? | ||
.label("GC content") | ||
.legend(|(x, y)| PathElement::new(vec![(x, y), (x + 20, y)], &GREEN)); | ||
|
||
charts | ||
.configure_series_labels() | ||
.background_style(&WHITE.mix(0.9)) | ||
.border_style(&BLACK) | ||
.position(SeriesLabelPosition::UpperRight) | ||
.draw()?; | ||
} | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters