Skip to content

Commit 5a9eb8c

Browse files
committed
Improve AlignedSegment.query_sequence caching
Updating query_sequence did not invalidate cache_query_alignment_sequence previously. Clear caches in __set__ et al and populate them only in __get__ routines.
1 parent 4ccde8e commit 5a9eb8c

File tree

3 files changed

+14
-9
lines changed

3 files changed

+14
-9
lines changed

pysam/libcalignedsegment.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ cdef class AlignedSegment:
4848
cdef void _clear_cached_query_qualities(self)
4949
cdef object cache_query_sequence
5050
cdef object cache_query_alignment_sequence
51+
cdef void _clear_cached_query_sequences(self)
5152

5253
# add an alignment tag with value to the AlignedSegment
5354
# an existing tag of the same name will be replaced.

pysam/libcalignedsegment.pyx

+13-7
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ cdef AlignedSegment makeAlignedSegment(bam1_t *src,
563563
cdef AlignedSegment dest = AlignedSegment.__new__(AlignedSegment)
564564
dest._delegate = bam_dup1(src)
565565
dest.header = header
566+
dest._clear_cached_query_sequences()
566567
dest._clear_cached_query_qualities()
567568
return dest
568569

@@ -947,9 +948,8 @@ cdef class AlignedSegment:
947948
self._delegate.core.mpos = -1
948949

949950
# caching for selected fields
951+
self._clear_cached_query_sequences()
950952
self._clear_cached_query_qualities()
951-
self.cache_query_sequence = None
952-
self.cache_query_alignment_sequence = None
953953

954954
self.header = header
955955

@@ -1097,6 +1097,7 @@ cdef class AlignedSegment:
10971097
cdef AlignedSegment dest = cls.__new__(cls)
10981098
dest._delegate = <bam1_t*>calloc(1, sizeof(bam1_t))
10991099
dest.header = header
1100+
dest._clear_cached_query_sequences()
11001101
dest._clear_cached_query_qualities()
11011102

11021103
cdef kstring_t line
@@ -1379,6 +1380,10 @@ cdef class AlignedSegment:
13791380
def __set__(self, isize):
13801381
self._delegate.core.isize = isize
13811382

1383+
cdef void _clear_cached_query_sequences(self):
1384+
self.cache_query_sequence = NotImplemented
1385+
self.cache_query_alignment_sequence = NotImplemented
1386+
13821387
property query_sequence:
13831388
"""read sequence bases, including :term:`soft clipped` bases
13841389
(None if not present).
@@ -1396,14 +1401,15 @@ cdef class AlignedSegment:
13961401
has aligned the read to the reverse strand.)
13971402
"""
13981403
def __get__(self):
1399-
if self.cache_query_sequence:
1404+
if self.cache_query_sequence is not NotImplemented:
14001405
return self.cache_query_sequence
14011406

14021407
cdef bam1_t * src
14031408
cdef char * s
14041409
src = self._delegate
14051410

14061411
if src.core.l_qseq == 0:
1412+
self.cache_query_sequence = None
14071413
return None
14081414

14091415
self.cache_query_sequence = force_str(getSequenceInRange(
@@ -1460,7 +1466,7 @@ cdef class AlignedSegment:
14601466
p = pysam_bam_get_qual(src)
14611467
memset(p, 0xff, l)
14621468

1463-
self.cache_query_sequence = force_str(seq)
1469+
self._clear_cached_query_sequences()
14641470
self._clear_cached_query_qualities()
14651471

14661472
cdef void _clear_cached_query_qualities(self):
@@ -1765,7 +1771,7 @@ cdef class AlignedSegment:
17651771
"""
17661772

17671773
def __get__(self):
1768-
if self.cache_query_alignment_sequence:
1774+
if self.cache_query_alignment_sequence is not NotImplemented:
17691775
return self.cache_query_alignment_sequence
17701776

17711777
cdef bam1_t * src
@@ -1774,13 +1780,13 @@ cdef class AlignedSegment:
17741780
src = self._delegate
17751781

17761782
if src.core.l_qseq == 0:
1783+
self.cache_query_alignment_sequence = None
17771784
return None
17781785

17791786
start = getQueryStart(src)
17801787
end = getQueryEnd(src)
17811788

1782-
self.cache_query_alignment_sequence = force_str(
1783-
getSequenceInRange(src, start, end))
1789+
self.cache_query_alignment_sequence = force_str(getSequenceInRange(src, start, end))
17841790
return self.cache_query_alignment_sequence
17851791

17861792
property query_alignment_qualities:

tests/AlignedSegment_test.py

-2
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,6 @@ def testClearSequence(self):
253253
a.query_sequence = "*"
254254
self.assertEqual(a.query_length, 0)
255255

256-
@unittest.expectedFailure # Updating query_sequence does not reset cached query_alignment_sequence
257256
def testUpdateSequenceEffects1(self):
258257
a = self.build_read()
259258
a.query_sequence = "ATGCATGC"
@@ -263,7 +262,6 @@ def testUpdateSequenceEffects1(self):
263262
a.query_sequence = "AATTGGCC"
264263
self.assertEqual(a.query_alignment_sequence, "ATTGG")
265264

266-
@unittest.expectedFailure # Clearing query_sequence via "*" caches an incorrect query_sequence
267265
def testUpdateSequenceEffects2(self):
268266
a = self.build_read()
269267
a.query_sequence = "ATGCATGC"

0 commit comments

Comments
 (0)