Skip to content

Commit 8a74898

Browse files
holtgrewejohanneskoester
authored andcommitted
WIP: Starting with BCF header records (#76)
BCF header records
1 parent cfeceed commit 8a74898

File tree

6 files changed

+122
-6
lines changed

6 files changed

+122
-6
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ lazy_static = "0.2"
2323
bitflags = "0.9"
2424
serde = { version = "^1", optional = true }
2525
regex = "0.2"
26+
linear-map = "1.2.0"
2627

2728
[features]
2829
default = []

src/bam/record_serde.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -289,9 +289,9 @@ impl<'de> Deserialize<'de> for Record {
289289

290290
#[cfg(test)]
291291
mod tests {
292-
use bam::record::Record;
293292
use bam::Read;
294293
use bam::Reader;
294+
use bam::record::Record;
295295

296296
use std::path::Path;
297297

src/bcf/header.rs

+92
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ use std::str;
99

1010
use htslib;
1111

12+
use linear_map::LinearMap;
13+
1214
pub type SampleSubset = Vec<i32>;
1315

1416
custom_derive! {
@@ -191,6 +193,38 @@ impl Drop for Header {
191193
}
192194
}
193195

196+
/// A header record.
197+
#[derive(Debug)]
198+
pub enum HeaderRecord {
199+
/// A `FILTER` header record.
200+
Filter {
201+
key: String,
202+
values: LinearMap<String, String>,
203+
},
204+
/// An `INFO` header record.
205+
Info {
206+
key: String,
207+
values: LinearMap<String, String>,
208+
},
209+
/// A `FORMAT` header record.
210+
Format {
211+
key: String,
212+
values: LinearMap<String, String>,
213+
},
214+
/// A `contig` header record.
215+
Contig {
216+
key: String,
217+
values: LinearMap<String, String>,
218+
},
219+
/// A structured header record.
220+
Structured {
221+
key: String,
222+
values: LinearMap<String, String>,
223+
},
224+
/// A generic, unstructured header record.
225+
Generic { key: String, value: String },
226+
}
227+
194228
#[derive(Debug)]
195229
pub struct HeaderView {
196230
pub inner: *mut htslib::bcf_hdr_t,
@@ -340,6 +374,64 @@ impl HeaderView {
340374
};
341375
key.to_bytes().to_vec()
342376
}
377+
378+
/// Return structured `HeaderRecord`s.
379+
pub fn header_records(&self) -> Vec<HeaderRecord> {
380+
fn parse_kv(rec: &htslib::bcf_hrec_t) -> LinearMap<String, String> {
381+
let mut result: LinearMap<String, String> = LinearMap::new();
382+
for i in 0_i32..(rec.nkeys) {
383+
let key = unsafe {
384+
ffi::CStr::from_ptr(*rec.keys.offset(i as isize))
385+
.to_str()
386+
.unwrap()
387+
.to_string()
388+
};
389+
let value = unsafe {
390+
ffi::CStr::from_ptr(*rec.vals.offset(i as isize))
391+
.to_str()
392+
.unwrap()
393+
.to_string()
394+
};
395+
result.insert(key, value);
396+
}
397+
result
398+
}
399+
400+
let mut result: Vec<HeaderRecord> = Vec::new();
401+
for i in 1_i32..unsafe { (*self.inner).nhrec } {
402+
let rec = unsafe { &(**(*self.inner).hrec.offset(i as isize)) };
403+
let key = unsafe { ffi::CStr::from_ptr(rec.key).to_str().unwrap().to_string() };
404+
let record = match rec.type_ {
405+
0 => HeaderRecord::Filter {
406+
key,
407+
values: parse_kv(rec),
408+
},
409+
1 => HeaderRecord::Info {
410+
key,
411+
values: parse_kv(rec),
412+
},
413+
2 => HeaderRecord::Format {
414+
key,
415+
values: parse_kv(rec),
416+
},
417+
3 => HeaderRecord::Contig {
418+
key,
419+
values: parse_kv(rec),
420+
},
421+
4 => HeaderRecord::Structured {
422+
key,
423+
values: parse_kv(rec),
424+
},
425+
5 => HeaderRecord::Generic {
426+
key,
427+
value: unsafe { ffi::CStr::from_ptr(rec.value).to_str().unwrap().to_string() },
428+
},
429+
_ => panic!("Unknown type: {}", rec.type_),
430+
};
431+
result.push(record);
432+
}
433+
result
434+
}
343435
}
344436

345437
impl Clone for HeaderView {

src/bcf/mod.rs

+23-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ pub mod record;
2424
use bcf::header::{HeaderView, SampleSubset};
2525
use htslib;
2626

27-
pub use bcf::buffer::RecordBuffer;
28-
pub use bcf::header::Header;
27+
pub use bcf::header::{Header, HeaderRecord};
2928
pub use bcf::record::Record;
3029

3130
/// Redefinition of corresponding `#define` in `vcf.h.`.
@@ -839,6 +838,28 @@ mod tests {
839838
assert!(header.sample_to_id(b"three").is_err());
840839
}
841840

841+
#[test]
842+
fn test_header_records() {
843+
let vcf = Reader::from_path(&"test/test_string.vcf")
844+
.ok()
845+
.expect("Error opening file.");
846+
let records = vcf.header().header_records();
847+
assert_eq!(records.len(), 9);
848+
849+
match &records[0] {
850+
&HeaderRecord::Filter {
851+
ref key,
852+
ref values,
853+
} => {
854+
assert_eq!(key, "FILTER");
855+
assert_eq!(values["ID"], "PASS");
856+
}
857+
_ => {
858+
assert!(false);
859+
}
860+
}
861+
}
862+
842863
// Helper function reading full file into string.
843864
fn read_all<P: AsRef<Path>>(path: P) -> String {
844865
let mut file = File::open(path.as_ref())

src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ extern crate quick_error;
8383
extern crate regex;
8484
extern crate url;
8585

86+
extern crate linear_map;
87+
8688
#[cfg(feature = "serde")]
8789
extern crate serde;
8890

src/sam/mod.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ use std::path::Path;
1010

1111
use htslib;
1212

13+
use bam::HeaderView;
1314
use bam::header;
1415
use bam::record;
15-
use bam::HeaderView;
1616

1717
/// SAM writer.
1818
#[derive(Debug)]
@@ -111,10 +111,10 @@ quick_error! {
111111

112112
#[cfg(test)]
113113
mod tests {
114-
use bam::header;
115-
use bam::record;
116114
use bam::Read;
117115
use bam::Reader;
116+
use bam::header;
117+
use bam::record;
118118
use sam::Writer;
119119

120120
#[test]

0 commit comments

Comments
 (0)