Skip to content

Commit a9a0caf

Browse files
committed
BREAKING CHANGE: introduce Bitmap for handling bitfield operations
Closes: https://github.com/ipfs/go-hamt-ipld/issues/54
1 parent 7597825 commit a9a0caf

File tree

7 files changed

+499
-156
lines changed

7 files changed

+499
-156
lines changed

Diff for: bitmap.go

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
package hamt
2+
3+
import (
4+
"fmt"
5+
"math"
6+
"math/bits"
7+
)
8+
9+
// Bitmap is a managed bitmap, primarily for the purpose of tracking the
10+
// presence or absence of elements in an associated array. It can set and unset
11+
// individual bits and perform limited popcount for a given index to calculate
12+
// the position in the associated compacted array.
13+
type Bitmap struct {
14+
Bytes []byte
15+
}
16+
17+
// NewBitmap creates a new bitmap for a given bitWidth. The bitmap will hold
18+
// 2^bitWidth bytes.
19+
func NewBitmap(bitWidth int) *Bitmap {
20+
bc := (1 << uint(bitWidth)) / 8
21+
if bc == 0 {
22+
panic("bitWidth too small")
23+
}
24+
25+
return NewBitmapFrom(make([]byte, bc))
26+
}
27+
28+
// NewBitmapFrom creates a new Bitmap from an existing byte array. It is
29+
// assumed that bytes is the correct length for the bitWidth of this Bitmap.
30+
func NewBitmapFrom(bytes []byte) *Bitmap {
31+
if len(bytes) == 0 {
32+
panic("can't form Bitmap from zero bytes")
33+
}
34+
bm := Bitmap{Bytes: bytes}
35+
return &bm
36+
}
37+
38+
// BitWidth calculates the bitWidth of this Bitmap by performing a
39+
// log2(bits). The bitWidth is the minimum number of bits required to
40+
// form indexes that address all of this Bitmap. e.g. a bitWidth of 5 can form
41+
// indexes of 0 to 31, i.e. 4 bytes.
42+
func (bm *Bitmap) BitWidth() int {
43+
return int(math.Log2(float64(len(bm.Bytes) * 8)))
44+
}
45+
46+
func (bm *Bitmap) bindex(in int) int {
47+
// Return `in` to flip the byte addressing order to LE. For BE we address
48+
// from the last byte backward.
49+
bi := len(bm.Bytes) - 1 - in
50+
if bi > len(bm.Bytes) || bi < 0 {
51+
panic(fmt.Sprintf("invalid index for this Bitmap (index: %v, bytes: %v)", in, len(bm.Bytes)))
52+
}
53+
return bi
54+
}
55+
56+
// IsSet indicates whether the bit at the provided position is set or not.
57+
func (bm *Bitmap) IsSet(position int) bool {
58+
byt := bm.bindex(position / 8)
59+
offset := uint(position % 8)
60+
return (bm.Bytes[byt]>>offset)&1 == 1
61+
}
62+
63+
// Set sets or unsets the bit at the given position according. If set is true,
64+
// the bit will be set. If set is false, the bit will be unset.
65+
func (bm *Bitmap) Set(position int, set bool) {
66+
has := bm.IsSet(position)
67+
byt := bm.bindex(position / 8)
68+
offset := uint(position % 8)
69+
70+
if set && !has {
71+
bm.Bytes[byt] |= 1 << offset
72+
} else if !set && has {
73+
bm.Bytes[byt] ^= 1 << offset
74+
}
75+
}
76+
77+
// Index performs a limited popcount up to the given position. This calculates
78+
// the number of set bits up to the index of the bitmap. Useful for calculating
79+
// the position of an element in an associated compacted array.
80+
func (bm *Bitmap) Index(position int) int {
81+
t := 0
82+
eb := position / 8
83+
byt := 0
84+
for ; byt < eb; byt++ {
85+
// quick popcount for the full bytes
86+
t += bits.OnesCount(uint(bm.Bytes[bm.bindex(byt)]))
87+
}
88+
eb = eb * 8
89+
if position > eb {
90+
for i := byt * 8; i < position; i++ {
91+
// manual per-bit check for the remainder <8 bits
92+
if bm.IsSet(i) {
93+
t++
94+
}
95+
}
96+
}
97+
return t
98+
}
99+
100+
// Copy creates a clone of the Bitmap, creating a new byte array with the same
101+
// contents as the original.
102+
func (bm *Bitmap) Copy() *Bitmap {
103+
ba := make([]byte, len(bm.Bytes))
104+
copy(ba, bm.Bytes)
105+
return NewBitmapFrom(ba)
106+
}
107+
108+
// BitsSetCount counts how many bits are set in the bitmap.
109+
func (bm *Bitmap) BitsSetCount() int {
110+
count := 0
111+
for _, b := range bm.Bytes {
112+
count += bits.OnesCount(uint(b))
113+
}
114+
return count
115+
}

Diff for: bitmap_test.go

+250
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
package hamt
2+
3+
import (
4+
"bytes"
5+
"testing"
6+
)
7+
8+
// many cases taken from https://github.com/rvagg/iamap/blob/fad95295b013c8b4f0faac6dd5d9be175f6e606c/test/bit-utils-test.js
9+
// but rev() is used to reverse the data in most instances
10+
11+
// reverse for BE format
12+
func rev(in []byte) []byte {
13+
out := make([]byte, len(in))
14+
for i := 0; i < len(in); i++ {
15+
out[len(in)-1-i] = in[i]
16+
}
17+
return out
18+
}
19+
20+
// 8-char binary string to byte, no binary literals in old Go
21+
func bb(s string) byte {
22+
var r byte
23+
for i, c := range s {
24+
if c == '1' {
25+
r |= 1 << uint(7-i)
26+
}
27+
}
28+
return r
29+
}
30+
31+
func TestBitmapHas(t *testing.T) {
32+
type tcase struct {
33+
bytes []byte
34+
pos int
35+
set bool
36+
}
37+
cases := []tcase{
38+
{b(0x0), 0, false},
39+
{b(0x1), 0, true},
40+
{b(bb("00101010")), 2, false},
41+
{b(bb("00101010")), 3, true},
42+
{b(bb("00101010")), 4, false},
43+
{b(bb("00101010")), 5, true},
44+
{b(bb("00100000")), 5, true},
45+
{[]byte{0x0, bb("00100000")}, 8 + 5, true},
46+
{[]byte{0x0, 0x0, bb("00100000")}, 8*2 + 5, true},
47+
{[]byte{0x0, 0x0, 0x0, bb("00100000")}, 8*3 + 5, true},
48+
{[]byte{0x0, 0x0, 0x0, 0x0, bb("00100000")}, 8*4 + 5, true},
49+
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, bb("00100000")}, 8*5 + 5, true},
50+
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, bb("00100000")}, 8*4 + 5, false},
51+
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, bb("00100000")}, 8*3 + 5, false},
52+
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, bb("00100000")}, 8*2 + 5, false},
53+
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, bb("00100000")}, 8 + 5, false},
54+
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, bb("00100000")}, 5, false},
55+
}
56+
57+
for _, c := range cases {
58+
bm := NewBitmapFrom(rev(c.bytes))
59+
if bm.IsSet(c.pos) != c.set {
60+
t.Fatalf("bitmap %v IsSet(%v) should be %v", c.bytes, c.pos, c.set)
61+
}
62+
}
63+
}
64+
65+
func TestBitmapBitWidth(t *testing.T) {
66+
for i := 3; i <= 16; i++ {
67+
if NewBitmap(i).BitWidth() != i {
68+
t.Fatal("incorrect bitWidth calculation")
69+
}
70+
if NewBitmapFrom(make([]byte, (1<<i)/8)).BitWidth() != i {
71+
t.Fatal("incorrect bitWidth calculation")
72+
}
73+
}
74+
}
75+
76+
func TestBitmapIndex(t *testing.T) {
77+
type tcase struct {
78+
bytes []byte
79+
pos int
80+
expected int
81+
}
82+
cases := []tcase{
83+
{b(bb("00111111")), 0, 0},
84+
{b(bb("00111111")), 1, 1},
85+
{b(bb("00111111")), 2, 2},
86+
{b(bb("00111111")), 4, 4},
87+
{b(bb("00111100")), 2, 0},
88+
{b(bb("00111101")), 4, 3},
89+
{b(bb("00111001")), 4, 2},
90+
{b(bb("00111000")), 4, 1},
91+
{b(bb("00110000")), 4, 0},
92+
{b(bb("00000000")), 0, 0},
93+
{b(bb("00000000")), 1, 0},
94+
{b(bb("00000000")), 2, 0},
95+
{b(bb("00000000")), 3, 0},
96+
{[]byte{0x0, 0x0, 0x0}, 20, 0},
97+
{[]byte{0xff, 0xff, 0xff}, 5, 5},
98+
{[]byte{0xff, 0xff, 0xff}, 7, 7},
99+
{[]byte{0xff, 0xff, 0xff}, 8, 8},
100+
{[]byte{0xff, 0xff, 0xff}, 10, 10},
101+
{[]byte{0xff, 0xff, 0xff}, 20, 20},
102+
}
103+
104+
for _, c := range cases {
105+
bm := NewBitmapFrom(rev(c.bytes))
106+
if bm.Index(c.pos) != c.expected {
107+
t.Fatalf("bitmap %v Index(%v) should be %v", c.bytes, c.pos, c.expected)
108+
}
109+
}
110+
}
111+
112+
func TestBitmap_32bitFixed(t *testing.T) {
113+
// a 32-byte bitmap and a list of all the bits that are set
114+
byts := []byte{
115+
bb("00100101"), bb("10000000"), bb("00000000"), bb("01000000"),
116+
bb("00000000"), bb("01000000"), bb("00000000"), bb("01000000"),
117+
bb("00000000"), bb("00100000"), bb("00000000"), bb("01000000"),
118+
bb("00000000"), bb("00010000"), bb("00000000"), bb("01000000"),
119+
bb("00000000"), bb("00001000"), bb("00000000"), bb("01000000"),
120+
bb("00000000"), bb("00000100"), bb("00000000"), bb("01000000"),
121+
bb("00000000"), bb("00000010"), bb("00000000"), bb("01000000"),
122+
bb("00000000"), bb("00000001"), bb("00000000"), bb("01000000"),
123+
}
124+
bm := NewBitmapFrom(rev(byts))
125+
set := []int{
126+
0, 2, 5, 8 + 7, 8*3 + 6,
127+
8*5 + 6, 8*7 + 6,
128+
8*9 + 5, 8*11 + 6,
129+
8*13 + 4, 8*15 + 6,
130+
8*17 + 3, 8*19 + 6,
131+
8*21 + 2, 8*23 + 6,
132+
8*25 + 1, 8*27 + 6,
133+
8 * 29, 8*31 + 6}
134+
135+
c := 0
136+
for i := 0; i < 256; i++ {
137+
if c < len(set) && i == set[c] {
138+
if !bm.IsSet(i) {
139+
t.Fatalf("IsSet(%v) should be true", i)
140+
}
141+
// the index c of `set` also gives us the translation of Index(i)
142+
if bm.Index(i) != c {
143+
t.Fatalf("Index(%v) should be %v", i, c)
144+
}
145+
c++
146+
} else {
147+
if bm.IsSet(i) {
148+
t.Fatalf("IsSet(%v) should be false", i)
149+
}
150+
}
151+
}
152+
}
153+
154+
func TestBitmapSetBytes(t *testing.T) {
155+
newSet := func(bitWidth int, ba []byte, index int, set bool) []byte {
156+
var bm *Bitmap
157+
if ba != nil {
158+
bm = NewBitmapFrom(ba)
159+
} else {
160+
bm = NewBitmap(bitWidth)
161+
}
162+
bm.Set(index, set)
163+
return bm.Bytes
164+
}
165+
166+
if !bytes.Equal(newSet(3, nil, 0, true), rev([]byte{bb("00000001")})) {
167+
t.Fatal("Failed bytes comparison")
168+
}
169+
if !bytes.Equal(newSet(3, nil, 1, true), rev([]byte{bb("00000010")})) {
170+
t.Fatal("Failed bytes comparison")
171+
}
172+
if !bytes.Equal(newSet(3, nil, 7, true), rev([]byte{bb("10000000")})) {
173+
t.Fatal("Failed bytes comparison")
174+
}
175+
if !bytes.Equal(newSet(0, rev([]byte{bb("11111111")}), 0, true), rev([]byte{bb("11111111")})) {
176+
t.Fatal("Failed bytes comparison")
177+
}
178+
if !bytes.Equal(newSet(0, rev([]byte{bb("11111111")}), 7, true), rev([]byte{bb("11111111")})) {
179+
t.Fatal("Failed bytes comparison")
180+
}
181+
if !bytes.Equal(newSet(0, rev([]byte{bb("01010101")}), 1, true), rev([]byte{bb("01010111")})) {
182+
t.Fatal("Failed bytes comparison")
183+
}
184+
if !bytes.Equal(newSet(0, rev([]byte{bb("01010101")}), 7, true), rev([]byte{bb("11010101")})) {
185+
t.Fatal("Failed bytes comparison")
186+
}
187+
if !bytes.Equal(newSet(0, rev([]byte{bb("11111111")}), 0, false), rev([]byte{bb("11111110")})) {
188+
t.Fatal("Failed bytes comparison")
189+
}
190+
if !bytes.Equal(newSet(0, rev([]byte{bb("11111111")}), 1, false), rev([]byte{bb("11111101")})) {
191+
t.Fatal("Failed bytes comparison")
192+
}
193+
if !bytes.Equal(newSet(0, rev([]byte{bb("11111111")}), 7, false), rev([]byte{bb("01111111")})) {
194+
t.Fatal("Failed bytes comparison")
195+
}
196+
if !bytes.Equal(newSet(0, rev([]byte{0, bb("11111111")}), 8+0, true), rev([]byte{0, bb("11111111")})) {
197+
t.Fatal("Failed bytes comparison")
198+
}
199+
if !bytes.Equal(newSet(0, rev([]byte{0, bb("11111111")}), 8+7, true), rev([]byte{0, bb("11111111")})) {
200+
t.Fatal("Failed bytes comparison")
201+
}
202+
if !bytes.Equal(newSet(0, rev([]byte{0, bb("01010101")}), 8+1, true), rev([]byte{0, bb("01010111")})) {
203+
t.Fatal("Failed bytes comparison")
204+
}
205+
if !bytes.Equal(newSet(0, rev([]byte{0, bb("01010101")}), 8+7, true), rev([]byte{0, bb("11010101")})) {
206+
t.Fatal("Failed bytes comparison")
207+
}
208+
if !bytes.Equal(newSet(0, rev([]byte{0, bb("11111111")}), 8+0, false), rev([]byte{0, bb("11111110")})) {
209+
t.Fatal("Failed bytes comparison")
210+
}
211+
if !bytes.Equal(newSet(0, rev([]byte{0, bb("11111111")}), 8+1, false), rev([]byte{0, bb("11111101")})) {
212+
t.Fatal("Failed bytes comparison")
213+
}
214+
if !bytes.Equal(newSet(0, rev([]byte{0, bb("11111111")}), 8+7, false), rev([]byte{0, bb("01111111")})) {
215+
t.Fatal("Failed bytes comparison")
216+
}
217+
if !bytes.Equal(newSet(0, rev([]byte{0}), 0, false), rev([]byte{bb("00000000")})) {
218+
t.Fatal("Failed bytes comparison")
219+
}
220+
if !bytes.Equal(newSet(0, rev([]byte{0}), 7, false), rev([]byte{bb("00000000")})) {
221+
t.Fatal("Failed bytes comparison")
222+
}
223+
if !bytes.Equal(newSet(0, rev([]byte{bb("01010101")}), 0, false), rev([]byte{bb("01010100")})) {
224+
t.Fatal("Failed bytes comparison")
225+
}
226+
if !bytes.Equal(newSet(0, rev([]byte{bb("01010101")}), 6, false), rev([]byte{bb("00010101")})) {
227+
t.Fatal("Failed bytes comparison")
228+
}
229+
if !bytes.Equal(newSet(0, rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")}), 0, false), rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")})) {
230+
t.Fatal("Failed bytes comparison")
231+
}
232+
if !bytes.Equal(newSet(0, rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")}), 0, true), rev([]byte{bb("11000011"), bb("11010010"), bb("01001010"), bb("00000001")})) {
233+
t.Fatal("Failed bytes comparison")
234+
}
235+
if !bytes.Equal(newSet(0, rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")}), 12, false), rev([]byte{bb("11000010"), bb("11000010"), bb("01001010"), bb("00000001")})) {
236+
t.Fatal("Failed bytes comparison")
237+
}
238+
if !bytes.Equal(newSet(0, rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")}), 12, true), rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")})) {
239+
t.Fatal("Failed bytes comparison")
240+
}
241+
if !bytes.Equal(newSet(0, rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")}), 24, false), rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000000")})) {
242+
t.Fatal("Failed bytes comparison")
243+
}
244+
if !bytes.Equal(newSet(0, rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")}), 24, true), rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")})) {
245+
t.Fatal("Failed bytes comparison")
246+
}
247+
if !bytes.Equal(newSet(0, rev([]byte{0, 0, 0, 0}), 31, true), rev([]byte{0, 0, 0, bb("10000000")})) {
248+
t.Fatal("Failed bytes comparison")
249+
}
250+
}

0 commit comments

Comments
 (0)