Skip to content

Commit 2584e45

Browse files
Merge pull request #36 from rust-bio/improve-cigar-api
Improve CIGAR API.
2 parents f7b2f70 + f564c9a commit 2584e45

File tree

2 files changed

+122
-20
lines changed

2 files changed

+122
-20
lines changed

src/bam/mod.rs

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -580,20 +580,14 @@ pub struct HeaderView {
580580

581581

582582
impl HeaderView {
583+
/// Create a new HeaderView from the underlying Htslib type, and own it.
583584
pub fn new(inner: *mut htslib::bam_hdr_t) -> Self {
584585
HeaderView {
585586
inner: inner,
586587
owned: true,
587588
}
588589
}
589590

590-
fn borrow(inner: *mut htslib::bam_hdr_t) -> Self {
591-
HeaderView {
592-
inner: inner,
593-
owned: false,
594-
}
595-
}
596-
597591
#[inline]
598592
pub fn inner(&self) -> htslib::bam_hdr_t {
599593
unsafe { (*self.inner) }
@@ -652,12 +646,12 @@ impl Drop for HeaderView {
652646
mod tests {
653647
extern crate tempdir;
654648
use super::*;
655-
use super::record::{Cigar,Aux};
649+
use super::record::{Cigar, CigarString, Aux};
656650
use super::header::HeaderRecord;
657651
use std::str;
658652
use std::path::Path;
659653

660-
fn gold() -> ([&'static [u8]; 6], [u16; 6], [&'static [u8]; 6], [&'static [u8]; 6], [[Cigar; 3]; 6]) {
654+
fn gold() -> ([&'static [u8]; 6], [u16; 6], [&'static [u8]; 6], [&'static [u8]; 6], [CigarString; 6]) {
661655
let names = [&b"I"[..], &b"II.14978392"[..], &b"III"[..], &b"IV"[..], &b"V"[..], &b"VI"[..]];
662656
let flags = [16u16, 16u16, 16u16, 16u16, 16u16, 2048u16];
663657
let seqs = [
@@ -677,12 +671,12 @@ mod tests {
677671
&b"#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"[..],
678672
];
679673
let cigars = [
680-
[Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)],
681-
[Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)],
682-
[Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)],
683-
[Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)],
684-
[Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)],
685-
[Cigar::Match(27), Cigar::Del(100000), Cigar::Match(73)],
674+
CigarString(vec![Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)]),
675+
CigarString(vec![Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)]),
676+
CigarString(vec![Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)]),
677+
CigarString(vec![Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)]),
678+
CigarString(vec![Cigar::Match(27), Cigar::Del(1), Cigar::Match(73)]),
679+
CigarString(vec![Cigar::Match(27), Cigar::Del(100000), Cigar::Match(73)]),
686680
];
687681
(names, flags, seqs, quals, cigars)
688682
}

src/bam/record.rs

Lines changed: 113 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use std::slice;
88
use std::ffi;
99
use std::ops;
10+
use std::fmt;
1011

1112
use itertools::Itertools;
1213

@@ -227,10 +228,10 @@ impl Record {
227228
unsafe { slice::from_raw_parts(self.data()[self.qname_len()..].as_ptr() as *const u32, self.cigar_len()) }
228229
}
229230

230-
/// Get cigar sequence. Complexity: O(k) with k being the length of the cigar string.
231-
pub fn cigar(&self) -> Vec<Cigar> {
231+
/// Get cigar string. Complexity: O(k) with k being the length of the cigar string.
232+
pub fn cigar(&self) -> CigarString {
232233
let raw = self.raw_cigar();
233-
raw.iter().map(|&c| {
234+
CigarString(raw.iter().map(|&c| {
234235
let len = c >> 4;
235236
match c & 0b1111 {
236237
0 => Cigar::Match(len),
@@ -245,7 +246,7 @@ impl Record {
245246
9 => Cigar::Back(len),
246247
_ => panic!("Unexpected cigar type"),
247248
}
248-
}).collect()
249+
}).collect())
249250
}
250251

251252
fn seq_len(&self) -> usize {
@@ -453,7 +454,7 @@ unsafe impl<'a> Send for Seq<'a> {}
453454
unsafe impl<'a> Sync for Seq<'a> {}
454455

455456

456-
#[derive(PartialEq, Debug)]
457+
#[derive(PartialEq, Eq, Debug, Clone)]
457458
pub enum Cigar {
458459
Match(u32), // M
459460
Ins(u32), // I
@@ -483,8 +484,115 @@ impl Cigar {
483484
Cigar::Back(len) => len << 4 | 9,
484485
}
485486
}
487+
488+
/// Return the length the CIGAR.
489+
pub fn len(&self) -> u32 {
490+
match *self {
491+
Cigar::Match(len) => len,
492+
Cigar::Ins(len) => len,
493+
Cigar::Del(len) => len,
494+
Cigar::RefSkip(len) => len,
495+
Cigar::SoftClip(len) => len,
496+
Cigar::HardClip(len) => len,
497+
Cigar::Pad(len) => len,
498+
Cigar::Equal(len) => len,
499+
Cigar::Diff(len) => len,
500+
Cigar::Back(len) => len
501+
}
502+
}
503+
504+
/// Return the character representing the CIGAR.
505+
pub fn char(&self) -> char {
506+
match *self {
507+
Cigar::Match(_) => 'M',
508+
Cigar::Ins(_) => 'I',
509+
Cigar::Del(_) => 'D',
510+
Cigar::RefSkip(_) => 'N',
511+
Cigar::SoftClip(_) => 'S',
512+
Cigar::HardClip(_) => 'H',
513+
Cigar::Pad(_) => 'P',
514+
Cigar::Equal(_) => '=',
515+
Cigar::Diff(_) => 'X',
516+
Cigar::Back(_) => 'B'
517+
}
518+
}
519+
}
520+
521+
522+
impl fmt::Display for Cigar {
523+
fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
524+
fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
525+
}
486526
}
487527

488528

489529
unsafe impl Send for Cigar {}
490530
unsafe impl Sync for Cigar {}
531+
532+
533+
custom_derive! {
534+
/// A CIGAR string. This type wraps around a `Vec<Cigar>`.
535+
///
536+
/// # Example
537+
///
538+
/// ```
539+
/// use rust_htslib::bam::record::{Cigar, CigarString};
540+
///
541+
/// let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
542+
///
543+
/// // access by index
544+
/// assert_eq!(cigar[0], Cigar::Match(100));
545+
/// // format into classical string representation
546+
/// assert_eq!(format!("{}", cigar), "100M10S");
547+
/// // iterate
548+
/// for op in &cigar {
549+
/// println!("{}", op);
550+
/// }
551+
/// ```
552+
#[derive(NewtypeDeref,
553+
NewtypeIndex(usize),
554+
NewtypeIndexMut(usize),
555+
PartialEq,
556+
Eq,
557+
NewtypeDebug,
558+
Clone
559+
)]
560+
pub struct CigarString(pub Vec<Cigar>);
561+
}
562+
563+
564+
impl<'a> IntoIterator for &'a CigarString {
565+
type Item = &'a Cigar;
566+
type IntoIter = ::std::slice::Iter<'a, Cigar>;
567+
568+
fn into_iter(self) -> Self::IntoIter {
569+
(&(self.0)).into_iter()
570+
}
571+
}
572+
573+
574+
impl fmt::Display for CigarString {
575+
fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
576+
for op in self {
577+
fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
578+
}
579+
Ok(())
580+
}
581+
}
582+
583+
584+
#[cfg(test)]
585+
mod tests {
586+
use super::*;
587+
588+
#[test]
589+
fn test_cigar_string() {
590+
let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
591+
592+
assert_eq!(cigar[0], Cigar::Match(100));
593+
assert_eq!(format!("{}", cigar), "100M10S");
594+
for op in &cigar {
595+
println!("{}", op);
596+
}
597+
}
598+
}

0 commit comments

Comments
 (0)