@@ -32,7 +32,9 @@ use crate::utils::path_as_bytes;
32
32
pub use crate :: bam:: buffer:: RecordBuffer ;
33
33
pub use crate :: bam:: header:: Header ;
34
34
pub use crate :: bam:: record:: Record ;
35
- use std:: convert:: TryInto ;
35
+ use hts_sys:: { hts_fmt_option, sam_fields} ;
36
+ use std:: convert:: { TryFrom , TryInto } ;
37
+ use std:: mem:: MaybeUninit ;
36
38
37
39
/// # Safety
38
40
///
@@ -106,7 +108,7 @@ pub trait Read: Sized {
106
108
/// match result {
107
109
/// Ok(_) => {
108
110
/// println!("Read sequence: {:?}", record.seq().as_bytes());
109
- /// },
111
+ /// }
110
112
/// Err(_) => panic!("BAM parsing failed...")
111
113
/// }
112
114
/// }
@@ -217,6 +219,31 @@ pub trait Read: Sized {
217
219
///
218
220
/// * `tpool` - thread pool to use for compression work.
219
221
fn set_thread_pool ( & mut self , tpool : & ThreadPool ) -> Result < ( ) > ;
222
+
223
+ /// If the underlying file is in CRAM format, allows modifying CRAM options.
224
+ /// Note that this method does *not* check that the underlying file actually is in CRAM format.
225
+ ///
226
+ /// # Examples
227
+ ///
228
+ /// Set the required fields to RNAME and FLAG,
229
+ /// potentially allowing htslib to skip over the rest,
230
+ /// resulting in faster iteration:
231
+ /// ```
232
+ /// use rust_htslib::bam::{Read, Reader};
233
+ /// use hts_sys;
234
+ /// let mut cram = Reader::from_path(&"test/test_cram.cram").unwrap();
235
+ /// cram.set_cram_options(hts_sys::hts_fmt_option_CRAM_OPT_REQUIRED_FIELDS,
236
+ /// hts_sys::sam_fields_SAM_RNAME | hts_sys::sam_fields_SAM_FLAG).unwrap();
237
+ /// ```
238
+ fn set_cram_options ( & mut self , fmt_opt : hts_fmt_option , fields : sam_fields ) -> Result < ( ) > {
239
+ unsafe {
240
+ if hts_sys:: hts_set_opt ( self . htsfile ( ) , fmt_opt, fields) != 0 {
241
+ Err ( Error :: HtsSetOpt )
242
+ } else {
243
+ Ok ( ( ) )
244
+ }
245
+ }
246
+ }
220
247
}
221
248
222
249
/// A BAM reader.
@@ -471,6 +498,7 @@ pub enum FetchDefinition<'a> {
471
498
/// Only reads with the BAM flag BAM_FUNMAP (which might not be all reads with reference = -1)
472
499
Unmapped ,
473
500
}
501
+
474
502
impl < ' a , X : Into < FetchCoordinate > , Y : Into < FetchCoordinate > > From < ( i32 , X , Y ) >
475
503
for FetchDefinition < ' a >
476
504
{
@@ -480,6 +508,7 @@ impl<'a, X: Into<FetchCoordinate>, Y: Into<FetchCoordinate>> From<(i32, X, Y)>
480
508
FetchDefinition :: Region ( tup. 0 , start. 0 , stop. 0 )
481
509
}
482
510
}
511
+
483
512
impl < ' a , X : Into < FetchCoordinate > , Y : Into < FetchCoordinate > > From < ( u32 , X , Y ) >
484
513
for FetchDefinition < ' a >
485
514
{
@@ -563,7 +592,7 @@ impl<'a, T: AsRef<[u8]>, X: Into<FetchCoordinate>, Y: Into<FetchCoordinate>> Fro
563
592
pub struct IndexedReader {
564
593
htsfile : * mut htslib:: htsFile ,
565
594
header : Rc < HeaderView > ,
566
- idx : * mut htslib :: hts_idx_t ,
595
+ idx : Rc < IndexView > ,
567
596
itr : Option < * mut htslib:: hts_itr_t > ,
568
597
tpool : Option < ThreadPool > ,
569
598
}
@@ -609,7 +638,7 @@ impl IndexedReader {
609
638
Ok ( IndexedReader {
610
639
htsfile,
611
640
header : Rc :: new ( HeaderView :: new ( header) ) ,
612
- idx,
641
+ idx : Rc :: new ( IndexView :: new ( idx ) ) ,
613
642
itr : None ,
614
643
tpool : None ,
615
644
} )
@@ -637,7 +666,7 @@ impl IndexedReader {
637
666
Ok ( IndexedReader {
638
667
htsfile,
639
668
header : Rc :: new ( HeaderView :: new ( header) ) ,
640
- idx,
669
+ idx : Rc :: new ( IndexView :: new ( idx ) ) ,
641
670
itr : None ,
642
671
tpool : None ,
643
672
} )
@@ -706,12 +735,7 @@ impl IndexedReader {
706
735
match tid {
707
736
Some ( tid) => {
708
737
//'large position' spec says target len must will fit into an i64.
709
- let len: i64 = self
710
- . header
711
- . target_len ( tid as u32 )
712
- . unwrap ( )
713
- . try_into ( )
714
- . unwrap ( ) ;
738
+ let len: i64 = self . header . target_len ( tid) . unwrap ( ) . try_into ( ) . unwrap ( ) ;
715
739
self . _fetch_by_coord_tuple ( tid as i32 , 0 , len)
716
740
}
717
741
None => self . _fetch_by_str ( s) ,
@@ -726,7 +750,7 @@ impl IndexedReader {
726
750
if let Some ( itr) = self . itr {
727
751
unsafe { htslib:: hts_itr_destroy ( itr) }
728
752
}
729
- let itr = unsafe { htslib:: sam_itr_queryi ( self . idx , tid as i32 , beg as i64 , end as i64 ) } ;
753
+ let itr = unsafe { htslib:: sam_itr_queryi ( self . index ( ) . inner_ptr ( ) , tid, beg, end) } ;
730
754
if itr. is_null ( ) {
731
755
self . itr = None ;
732
756
Err ( Error :: Fetch )
@@ -744,7 +768,7 @@ impl IndexedReader {
744
768
let rptr = rstr. as_ptr ( ) ;
745
769
let itr = unsafe {
746
770
htslib:: sam_itr_querys (
747
- self . idx ,
771
+ self . index ( ) . inner_ptr ( ) ,
748
772
self . header ( ) . inner_ptr ( ) as * mut hts_sys:: sam_hdr_t ,
749
773
rptr,
750
774
)
@@ -783,6 +807,163 @@ impl IndexedReader {
783
807
pub fn set_reference < P : AsRef < Path > > ( & mut self , path : P ) -> Result < ( ) > {
784
808
unsafe { set_fai_filename ( self . htsfile , path) }
785
809
}
810
+
811
+ pub fn index ( & self ) -> & IndexView {
812
+ & self . idx
813
+ }
814
+
815
+ // Analogous to slow_idxstats in samtools, see
816
+ // https://github.com/samtools/samtools/blob/556c60fdff977c0e6cadc4c2581661f187098b4d/bam_index.c#L140-L199
817
+ unsafe fn slow_idxstats ( & mut self ) -> Result < Vec < ( i64 , u64 , u64 , u64 ) > > {
818
+ self . set_cram_options (
819
+ hts_sys:: hts_fmt_option_CRAM_OPT_REQUIRED_FIELDS,
820
+ hts_sys:: sam_fields_SAM_RNAME | hts_sys:: sam_fields_SAM_FLAG,
821
+ ) ?;
822
+ let header = self . header ( ) ;
823
+ let h = header. inner ;
824
+ let mut ret;
825
+ let mut last_tid = -2 ;
826
+ let fp = self . htsfile ( ) ;
827
+
828
+ let nref =
829
+ usize:: try_from ( hts_sys:: sam_hdr_nref ( h) ) . map_err ( |_| Error :: NoSequencesInReference ) ?;
830
+ if nref == 0 {
831
+ return Ok ( vec ! [ ] ) ;
832
+ }
833
+ let mut counts = vec ! [ vec![ 0 ; 2 ] ; nref + 1 ] ;
834
+ let mut bb: hts_sys:: bam1_t = MaybeUninit :: zeroed ( ) . assume_init ( ) ;
835
+ let b = & mut bb as * mut hts_sys:: bam1_t ;
836
+ loop {
837
+ ret = hts_sys:: sam_read1 ( fp, h, b) ;
838
+ if ret < 0 {
839
+ break ;
840
+ }
841
+ let tid = ( * b) . core . tid ;
842
+ if tid >= nref as i32 || tid < -1 {
843
+ return Err ( Error :: InvalidTid { tid } ) ;
844
+ }
845
+
846
+ if tid != last_tid {
847
+ if ( last_tid >= -1 ) && ( counts[ tid as usize ] [ 0 ] + counts[ tid as usize ] [ 1 ] ) > 0 {
848
+ return Err ( Error :: BamUnsorted ) ;
849
+ }
850
+ last_tid = tid;
851
+ }
852
+
853
+ let idx = if ( ( * b) . core . flag as u32 & hts_sys:: BAM_FUNMAP ) > 0 {
854
+ 1
855
+ } else {
856
+ 0
857
+ } ;
858
+ counts[ ( * b) . core . tid as usize ] [ idx] += 1 ;
859
+ }
860
+
861
+ if ret == -1 {
862
+ let res = ( 0 ..nref)
863
+ . map ( |i| {
864
+ (
865
+ i as i64 ,
866
+ header. target_len ( i as u32 ) . unwrap ( ) ,
867
+ counts[ i] [ 0 ] ,
868
+ counts[ i] [ 1 ] ,
869
+ )
870
+ } )
871
+ . chain ( [ ( -1 , 0 , counts[ nref] [ 0 ] , counts[ nref] [ 1 ] ) ] )
872
+ . collect ( ) ;
873
+ Ok ( res)
874
+ } else {
875
+ Err ( Error :: SlowIdxStats )
876
+ }
877
+ }
878
+
879
+ /// Similar to samtools idxstats, this returns a vector of tuples
880
+ /// containing the target id, length, number of mapped reads, and number of unmapped reads.
881
+ /// The last entry in the vector corresponds to the unmapped reads for the entire file, with
882
+ /// the tid set to -1.
883
+ pub fn index_stats ( & mut self ) -> Result < Vec < ( i64 , u64 , u64 , u64 ) > > {
884
+ let header = self . header ( ) ;
885
+ let index = self . index ( ) ;
886
+ if index. inner_ptr ( ) . is_null ( ) {
887
+ panic ! ( "Index is null" ) ;
888
+ }
889
+ // the quick index stats method only works for BAM files, not SAM or CRAM
890
+ unsafe {
891
+ if ( * self . htsfile ( ) ) . format . format != htslib:: htsExactFormat_bam {
892
+ return self . slow_idxstats ( ) ;
893
+ }
894
+ }
895
+ Ok ( ( 0 ..header. target_count ( ) )
896
+ . map ( |tid| {
897
+ let ( mapped, unmapped) = index. number_mapped_unmapped ( tid) ;
898
+ let tlen = header. target_len ( tid) . unwrap ( ) ;
899
+ ( tid as i64 , tlen, mapped, unmapped)
900
+ } )
901
+ . chain ( [ ( -1 , 0 , 0 , index. number_unmapped ( ) ) ] )
902
+ . collect :: < _ > ( ) )
903
+ }
904
+ }
905
+
906
+ #[ derive( Debug ) ]
907
+ pub struct IndexView {
908
+ inner : * mut hts_sys:: hts_idx_t ,
909
+ owned : bool ,
910
+ }
911
+
912
+ impl IndexView {
913
+ fn new ( hts_idx : * mut hts_sys:: hts_idx_t ) -> Self {
914
+ Self {
915
+ inner : hts_idx,
916
+ owned : true ,
917
+ }
918
+ }
919
+
920
+ #[ inline]
921
+ pub fn inner ( & self ) -> & hts_sys:: hts_idx_t {
922
+ unsafe { self . inner . as_ref ( ) . unwrap ( ) }
923
+ }
924
+
925
+ #[ inline]
926
+ // Pointer to inner hts_idx_t struct
927
+ pub fn inner_ptr ( & self ) -> * const hts_sys:: hts_idx_t {
928
+ self . inner
929
+ }
930
+
931
+ #[ inline]
932
+ pub fn inner_mut ( & mut self ) -> & mut hts_sys:: hts_idx_t {
933
+ unsafe { self . inner . as_mut ( ) . unwrap ( ) }
934
+ }
935
+
936
+ #[ inline]
937
+ // Mutable pointer to hts_idx_t struct
938
+ pub fn inner_ptr_mut ( & mut self ) -> * mut hts_sys:: hts_idx_t {
939
+ self . inner
940
+ }
941
+
942
+ /// Get the number of mapped and unmapped reads for a given target id
943
+ /// FIXME only valid for BAM, not SAM/CRAM
944
+ fn number_mapped_unmapped ( & self , tid : u32 ) -> ( u64 , u64 ) {
945
+ let ( mut mapped, mut unmapped) = ( 0 , 0 ) ;
946
+ unsafe {
947
+ hts_sys:: hts_idx_get_stat ( self . inner , tid as i32 , & mut mapped, & mut unmapped) ;
948
+ }
949
+ ( mapped, unmapped)
950
+ }
951
+
952
+ /// Get the total number of unmapped reads in the file
953
+ /// FIXME only valid for BAM, not SAM/CRAM
954
+ fn number_unmapped ( & self ) -> u64 {
955
+ unsafe { hts_sys:: hts_idx_get_n_no_coor ( self . inner ) }
956
+ }
957
+ }
958
+
959
+ impl Drop for IndexView {
960
+ fn drop ( & mut self ) {
961
+ if self . owned {
962
+ unsafe {
963
+ htslib:: hts_idx_destroy ( self . inner ) ;
964
+ }
965
+ }
966
+ }
786
967
}
787
968
788
969
impl Read for IndexedReader {
@@ -851,7 +1032,6 @@ impl Drop for IndexedReader {
851
1032
if self . itr . is_some ( ) {
852
1033
htslib:: hts_itr_destroy ( self . itr . unwrap ( ) ) ;
853
1034
}
854
- htslib:: hts_idx_destroy ( self . idx ) ;
855
1035
htslib:: hts_close ( self . htsfile ) ;
856
1036
}
857
1037
}
@@ -1615,6 +1795,7 @@ CCCCCCCCCCCCCCCCCCC"[..],
1615
1795
let bam = IndexedReader :: from_path ( & "test/test.bam" ) . expect ( "Expected valid index." ) ;
1616
1796
_test_read_indexed_common ( bam) ;
1617
1797
}
1798
+
1618
1799
#[ test]
1619
1800
fn test_read_indexed_different_index_name ( ) {
1620
1801
let bam = IndexedReader :: from_path_and_index (
@@ -2818,4 +2999,80 @@ CCCCCCCCCCCCCCCCCCC"[..],
2818
2999
assert_eq ! ( header_refseqs[ 0 ] . get( "SN" ) . unwrap( ) , "ref_1" , ) ;
2819
3000
assert_eq ! ( header_refseqs[ 0 ] . get( "LN" ) . unwrap( ) , "10000000" , ) ;
2820
3001
}
3002
+
3003
+ #[ test]
3004
+ fn test_idxstats_bam ( ) {
3005
+ let mut reader = IndexedReader :: from_path ( "test/test.bam" ) . unwrap ( ) ;
3006
+ let expected = vec ! [
3007
+ ( 0 , 15072423 , 6 , 0 ) ,
3008
+ ( 1 , 15279345 , 0 , 0 ) ,
3009
+ ( 2 , 13783700 , 0 , 0 ) ,
3010
+ ( 3 , 17493793 , 0 , 0 ) ,
3011
+ ( 4 , 20924149 , 0 , 0 ) ,
3012
+ ( -1 , 0 , 0 , 0 ) ,
3013
+ ] ;
3014
+ let actual = reader. index_stats ( ) . unwrap ( ) ;
3015
+ assert_eq ! ( expected, actual) ;
3016
+ }
3017
+
3018
+ #[ test]
3019
+ fn test_number_mapped_and_unmapped_bam ( ) {
3020
+ let reader = IndexedReader :: from_path ( "test/test.bam" ) . unwrap ( ) ;
3021
+ let expected = ( 6 , 0 ) ;
3022
+ let actual = reader. index ( ) . number_mapped_unmapped ( 0 ) ;
3023
+ assert_eq ! ( expected, actual) ;
3024
+ }
3025
+
3026
+ #[ test]
3027
+ fn test_number_unmapped_global_bam ( ) {
3028
+ let reader = IndexedReader :: from_path ( "test/test_unmapped.bam" ) . unwrap ( ) ;
3029
+ let expected = 8 ;
3030
+ let actual = reader. index ( ) . number_unmapped ( ) ;
3031
+ assert_eq ! ( expected, actual) ;
3032
+ }
3033
+
3034
+ #[ test]
3035
+ fn test_idxstats_cram ( ) {
3036
+ let mut reader = IndexedReader :: from_path ( "test/test_cram.cram" ) . unwrap ( ) ;
3037
+ reader. set_reference ( "test/test_cram.fa" ) . unwrap ( ) ;
3038
+ let expected = vec ! [
3039
+ ( 0 , 120 , 2 , 0 ) ,
3040
+ ( 1 , 120 , 2 , 0 ) ,
3041
+ ( 2 , 120 , 2 , 0 ) ,
3042
+ ( -1 , 0 , 0 , 0 ) ,
3043
+ ] ;
3044
+ let actual = reader. index_stats ( ) . unwrap ( ) ;
3045
+ assert_eq ! ( expected, actual) ;
3046
+ }
3047
+
3048
+ #[ test]
3049
+ fn test_slow_idxstats_cram ( ) {
3050
+ let mut reader = IndexedReader :: from_path ( "test/test_cram.cram" ) . unwrap ( ) ;
3051
+ reader. set_reference ( "test/test_cram.fa" ) . unwrap ( ) ;
3052
+ let expected = vec ! [
3053
+ ( 0 , 120 , 2 , 0 ) ,
3054
+ ( 1 , 120 , 2 , 0 ) ,
3055
+ ( 2 , 120 , 2 , 0 ) ,
3056
+ ( -1 , 0 , 0 , 0 ) ,
3057
+ ] ;
3058
+ let actual = reader. index_stats ( ) . unwrap ( ) ;
3059
+ assert_eq ! ( expected, actual) ;
3060
+ }
3061
+
3062
+ // #[test]
3063
+ // fn test_number_mapped_and_unmapped_cram() {
3064
+ // let mut reader = IndexedReader::from_path("test/test_cram.cram").unwrap();
3065
+ // reader.set_reference("test/test_cram.fa").unwrap();
3066
+ // let expected = (2, 0);
3067
+ // let actual = reader.index().number_mapped_unmapped(0);
3068
+ // assert_eq!(expected, actual);
3069
+ // }
3070
+ //
3071
+ // #[test]
3072
+ // fn test_number_unmapped_global_cram() {
3073
+ // let mut reader = IndexedReader::from_path("test/test_unmapped.cram").unwrap();
3074
+ // let expected = 8;
3075
+ // let actual = reader.index().number_unmapped();
3076
+ // assert_eq!(expected, actual);
3077
+ // }
2821
3078
}
0 commit comments