Skip to content

Commit 3a62f87

Browse files
committed
Perf improvements to collections::BitSet.
1 parent 551a74d commit 3a62f87

File tree

1 file changed

+77
-75
lines changed

1 file changed

+77
-75
lines changed

src/libcollections/bit.rs

+77-75
Original file line numberDiff line numberDiff line change
@@ -1451,8 +1451,8 @@ impl BitSet {
14511451
/// ```
14521452
#[inline]
14531453
#[stable(feature = "rust1", since = "1.0.0")]
1454-
pub fn iter(&self) -> bit_set::Iter {
1455-
SetIter {set: self, next_idx: 0}
1454+
pub fn iter<'a>(&'a self) -> bit_set::Iter<'a> {
1455+
SetIter(BlockIter::from_blocks(self.bit_vec.blocks()))
14561456
}
14571457

14581458
/// Iterator over each u32 stored in `self` union `other`.
@@ -1477,13 +1477,11 @@ impl BitSet {
14771477
pub fn union<'a>(&'a self, other: &'a BitSet) -> Union<'a> {
14781478
fn or(w1: u32, w2: u32) -> u32 { w1 | w2 }
14791479

1480-
Union(TwoBitPositions {
1481-
set: self,
1482-
other: other,
1480+
Union(BlockIter::from_blocks(TwoBitPositions {
1481+
set: self.bit_vec.blocks(),
1482+
other: other.bit_vec.blocks(),
14831483
merge: or,
1484-
current_word: 0,
1485-
next_idx: 0
1486-
})
1484+
}))
14871485
}
14881486

14891487
/// Iterator over each usize stored in `self` intersect `other`.
@@ -1508,13 +1506,12 @@ impl BitSet {
15081506
pub fn intersection<'a>(&'a self, other: &'a BitSet) -> Intersection<'a> {
15091507
fn bitand(w1: u32, w2: u32) -> u32 { w1 & w2 }
15101508
let min = cmp::min(self.bit_vec.len(), other.bit_vec.len());
1511-
Intersection(TwoBitPositions {
1512-
set: self,
1513-
other: other,
1509+
1510+
Intersection(BlockIter::from_blocks(TwoBitPositions {
1511+
set: self.bit_vec.blocks(),
1512+
other: other.bit_vec.blocks(),
15141513
merge: bitand,
1515-
current_word: 0,
1516-
next_idx: 0
1517-
}.take(min))
1514+
}).take(min))
15181515
}
15191516

15201517
/// Iterator over each usize stored in the `self` setminus `other`.
@@ -1546,13 +1543,11 @@ impl BitSet {
15461543
pub fn difference<'a>(&'a self, other: &'a BitSet) -> Difference<'a> {
15471544
fn diff(w1: u32, w2: u32) -> u32 { w1 & !w2 }
15481545

1549-
Difference(TwoBitPositions {
1550-
set: self,
1551-
other: other,
1546+
Difference(BlockIter::from_blocks(TwoBitPositions {
1547+
set: self.bit_vec.blocks(),
1548+
other: other.bit_vec.blocks(),
15521549
merge: diff,
1553-
current_word: 0,
1554-
next_idx: 0
1555-
})
1550+
}))
15561551
}
15571552

15581553
/// Iterator over each u32 stored in the symmetric difference of `self` and `other`.
@@ -1578,13 +1573,11 @@ impl BitSet {
15781573
pub fn symmetric_difference<'a>(&'a self, other: &'a BitSet) -> SymmetricDifference<'a> {
15791574
fn bitxor(w1: u32, w2: u32) -> u32 { w1 ^ w2 }
15801575

1581-
SymmetricDifference(TwoBitPositions {
1582-
set: self,
1583-
other: other,
1576+
SymmetricDifference(BlockIter::from_blocks(TwoBitPositions {
1577+
set: self.bit_vec.blocks(),
1578+
other: other.bit_vec.blocks(),
15841579
merge: bitxor,
1585-
current_word: 0,
1586-
next_idx: 0
1587-
})
1580+
}))
15881581
}
15891582

15901583
/// Unions in-place with the specified other bit vector.
@@ -1808,98 +1801,107 @@ impl hash::Hash for BitSet {
18081801
}
18091802
}
18101803

1811-
/// An iterator for `BitSet`.
18121804
#[derive(Clone)]
18131805
#[stable(feature = "rust1", since = "1.0.0")]
1814-
pub struct SetIter<'a> {
1815-
set: &'a BitSet,
1816-
next_idx: usize
1806+
struct BlockIter<T> where
1807+
T: Iterator<Item=u32> {
1808+
head: u32,
1809+
head_offset: usize,
1810+
tail: T
1811+
}
1812+
impl<'a, T> BlockIter<T> where
1813+
T: Iterator<Item=u32> {
1814+
fn from_blocks(mut blocks: T) -> BlockIter<T> {
1815+
let h = blocks.next().unwrap_or(0);
1816+
BlockIter {tail: blocks, head: h, head_offset: 0}
1817+
}
18171818
}
18181819

18191820
/// An iterator combining two `BitSet` iterators.
18201821
#[derive(Clone)]
18211822
struct TwoBitPositions<'a> {
1822-
set: &'a BitSet,
1823-
other: &'a BitSet,
1823+
set: Blocks<'a>,
1824+
other: Blocks<'a>,
18241825
merge: fn(u32, u32) -> u32,
1825-
current_word: u32,
1826-
next_idx: usize
18271826
}
18281827

1828+
/// An iterator for `BitSet`.
1829+
#[derive(Clone)]
1830+
#[stable(feature = "rust1", since = "1.0.0")]
1831+
pub struct SetIter<'a>(BlockIter<Blocks<'a>>);
18291832
#[derive(Clone)]
18301833
#[stable(feature = "rust1", since = "1.0.0")]
1831-
pub struct Union<'a>(TwoBitPositions<'a>);
1834+
pub struct Union<'a>(BlockIter<TwoBitPositions<'a>>);
18321835
#[derive(Clone)]
18331836
#[stable(feature = "rust1", since = "1.0.0")]
1834-
pub struct Intersection<'a>(Take<TwoBitPositions<'a>>);
1837+
pub struct Intersection<'a>(Take<BlockIter<TwoBitPositions<'a>>>);
18351838
#[derive(Clone)]
18361839
#[stable(feature = "rust1", since = "1.0.0")]
1837-
pub struct Difference<'a>(TwoBitPositions<'a>);
1840+
pub struct Difference<'a>(BlockIter<TwoBitPositions<'a>>);
18381841
#[derive(Clone)]
18391842
#[stable(feature = "rust1", since = "1.0.0")]
1840-
pub struct SymmetricDifference<'a>(TwoBitPositions<'a>);
1843+
pub struct SymmetricDifference<'a>(BlockIter<TwoBitPositions<'a>>);
18411844

18421845
#[stable(feature = "rust1", since = "1.0.0")]
1843-
impl<'a> Iterator for SetIter<'a> {
1846+
impl<'a, T> Iterator for BlockIter<T> where T: Iterator<Item=u32> {
18441847
type Item = usize;
18451848

18461849
fn next(&mut self) -> Option<usize> {
1847-
while self.next_idx < self.set.bit_vec.len() {
1848-
let idx = self.next_idx;
1849-
self.next_idx += 1;
1850-
1851-
if self.set.contains(&idx) {
1852-
return Some(idx);
1850+
while self.head == 0 {
1851+
match self.tail.next() {
1852+
Some(w) => self.head = w,
1853+
_ => return None
18531854
}
1855+
self.head_offset += u32::BITS;
18541856
}
18551857

1856-
return None;
1858+
let t = self.head & !self.head + 1;
1859+
// remove the least significant bit
1860+
self.head &= self.head - 1;
1861+
// return index of lsb
1862+
Some(self.head_offset + (u32::count_ones(t-1) as usize))
18571863
}
18581864

18591865
#[inline]
18601866
fn size_hint(&self) -> (usize, Option<usize>) {
1861-
(0, Some(self.set.bit_vec.len() - self.next_idx))
1867+
match self.tail.size_hint() {
1868+
(_, Some(h)) => (0, Some(1 + h * (u32::BITS as usize))),
1869+
_ => (0, None)
1870+
}
18621871
}
18631872
}
18641873

18651874
#[stable(feature = "rust1", since = "1.0.0")]
18661875
impl<'a> Iterator for TwoBitPositions<'a> {
1867-
type Item = usize;
1868-
1869-
fn next(&mut self) -> Option<usize> {
1870-
while self.next_idx < self.set.bit_vec.len() ||
1871-
self.next_idx < self.other.bit_vec.len() {
1872-
let bit_idx = self.next_idx % u32::BITS;
1873-
if bit_idx == 0 {
1874-
let s_bit_vec = &self.set.bit_vec;
1875-
let o_bit_vec = &self.other.bit_vec;
1876-
// Merging the two words is a bit of an awkward dance since
1877-
// one BitVec might be longer than the other
1878-
let word_idx = self.next_idx / u32::BITS;
1879-
let w1 = if word_idx < s_bit_vec.storage.len() {
1880-
s_bit_vec.storage[word_idx]
1881-
} else { 0 };
1882-
let w2 = if word_idx < o_bit_vec.storage.len() {
1883-
o_bit_vec.storage[word_idx]
1884-
} else { 0 };
1885-
self.current_word = (self.merge)(w1, w2);
1886-
}
1887-
1888-
self.next_idx += 1;
1889-
if self.current_word & (1 << bit_idx) != 0 {
1890-
return Some(self.next_idx - 1);
1891-
}
1876+
type Item = u32;
1877+
1878+
fn next(&mut self) -> Option<u32> {
1879+
match (self.set.next(), self.other.next()) {
1880+
(Some(a), Some(b)) => Some((self.merge)(a, b)),
1881+
(Some(a), None) => Some((self.merge)(a, 0)),
1882+
(None, Some(b)) => Some((self.merge)(0, b)),
1883+
_ => return None
18921884
}
1893-
return None;
18941885
}
18951886

18961887
#[inline]
18971888
fn size_hint(&self) -> (usize, Option<usize>) {
1898-
let cap = cmp::max(self.set.bit_vec.len(), self.other.bit_vec.len());
1899-
(0, Some(cap - self.next_idx))
1889+
let (a, al) = self.set.size_hint();
1890+
let (b, bl) = self.set.size_hint();
1891+
1892+
assert_eq!(a, b);
1893+
(a, cmp::max(al, bl))
19001894
}
19011895
}
19021896

1897+
#[stable(feature = "rust1", since = "1.0.0")]
1898+
impl<'a> Iterator for SetIter<'a> {
1899+
type Item = usize;
1900+
1901+
#[inline] fn next(&mut self) -> Option<usize> { self.0.next() }
1902+
#[inline] fn size_hint(&self) -> (usize, Option<usize>) { self.0.size_hint() }
1903+
}
1904+
19031905
#[stable(feature = "rust1", since = "1.0.0")]
19041906
impl<'a> Iterator for Union<'a> {
19051907
type Item = usize;

0 commit comments

Comments
 (0)