Skip to content

Commit 091854c

Browse files
committed
BREAKING CHANGE: introduce Bitmap for handling bitfield operations
Closes: https://github.com/ipfs/go-hamt-ipld/issues/54
1 parent 7597825 commit 091854c

File tree

7 files changed

+482
-156
lines changed

7 files changed

+482
-156
lines changed

Diff for: bitmap.go

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
package hamt
2+
3+
import (
4+
"fmt"
5+
"math"
6+
"math/bits"
7+
)
8+
9+
// Bitmap is a managed bitmap, primarily for the purpose of tracking the
10+
// presence or absence of elements in an associated array. It can set and unset
11+
// individual bits and perform limited popcount for a given index to calculate
12+
// the position in the associated compacted array.
13+
type Bitmap struct {
14+
Bytes []byte
15+
}
16+
17+
// NewBitmap creates a new bitmap for a given bitWidth. The bitmap will hold
18+
// 2^bitWidth bytes.
19+
func NewBitmap(bitWidth int) *Bitmap {
20+
bc := (1 << uint(bitWidth)) / 8
21+
if bc == 0 {
22+
panic("bitWidth too small")
23+
}
24+
25+
return NewBitmapFrom(make([]byte, bc))
26+
}
27+
28+
// NewBitmapFrom creates a new Bitmap from an existing byte array. It is
29+
// assumed that bytes is the correct length for the bitWidth of this Bitmap.
30+
func NewBitmapFrom(bytes []byte) *Bitmap {
31+
if len(bytes) == 0 {
32+
panic("can't form Bitmap from zero bytes")
33+
}
34+
bm := Bitmap{Bytes: bytes}
35+
return &bm
36+
}
37+
38+
// BitWidth calculates the bitWidth of this Bitmap by performing a
39+
// log2(bits). The bitWidth is the minimum number of bits required to
40+
// form indexes that address all of this Bitmap. e.g. a bitWidth of 5 can form
41+
// indexes of 0 to 31, i.e. 4 bytes.
42+
func (bm *Bitmap) BitWidth() int {
43+
return int(math.Log2(float64(len(bm.Bytes) * 8)))
44+
}
45+
46+
func (bm *Bitmap) bindex(in int) int {
47+
// Return `in` to flip the byte addressing order to LE. For BE we address
48+
// from the last byte backward.
49+
bi := len(bm.Bytes) - 1 - in
50+
if bi > len(bm.Bytes) || bi < 0 {
51+
panic(fmt.Sprintf("invalid index for this Bitmap (index: %v, bytes: %v)", in, len(bm.Bytes)))
52+
}
53+
return bi
54+
}
55+
56+
// IsSet indicates whether the bit at the provided position is set or not.
57+
func (bm *Bitmap) IsSet(position int) bool {
58+
byt := bm.bindex(position / 8)
59+
offset := position % 8
60+
return (bm.Bytes[byt]>>offset)&1 == 1
61+
}
62+
63+
// Set sets or unsets the bit at the given position according. If set is true,
64+
// the bit will be set. If set is false, the bit will be unset. Returns a
65+
// reference to this Bitmap.
66+
func (bm *Bitmap) Set(position int, set bool) *Bitmap {
67+
has := bm.IsSet(position)
68+
byt := bm.bindex(position / 8)
69+
offset := position % 8
70+
71+
if set && !has {
72+
bm.Bytes[byt] |= 1 << offset
73+
} else if !set && has {
74+
bm.Bytes[byt] ^= 1 << offset
75+
}
76+
77+
return bm
78+
}
79+
80+
// Index performs a limited popcount up to the given position. This calculates
81+
// the number of set bits up to the index of the bitmap. Useful for calculating
82+
// the position of an element in an associated compacted array.
83+
func (bm *Bitmap) Index(position int) int {
84+
t := 0
85+
eb := position / 8
86+
byt := 0
87+
for ; byt < eb; byt++ {
88+
// quick popcount for the full bytes
89+
t += bits.OnesCount(uint(bm.Bytes[bm.bindex(byt)]))
90+
}
91+
eb = eb * 8
92+
if position > eb {
93+
for i := byt * 8; i < position; i++ {
94+
// manual per-bit check for the remainder <8 bits
95+
if bm.IsSet(i) {
96+
t++
97+
}
98+
}
99+
}
100+
return t
101+
}
102+
103+
// Copy creates a clone of the Bitmap, creating a new byte array with the same
104+
// contents as the original.
105+
func (bm *Bitmap) Copy() *Bitmap {
106+
ba := make([]byte, len(bm.Bytes))
107+
copy(ba, bm.Bytes)
108+
return NewBitmapFrom(ba)
109+
}
110+
111+
// BitsSetCount counts how many bits are set in the bitmap.
112+
func (bm *Bitmap) BitsSetCount() int {
113+
count := 0
114+
for _, b := range bm.Bytes {
115+
count += bits.OnesCount(uint(b))
116+
}
117+
return count
118+
}

Diff for: bitmap_test.go

+230
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
package hamt
2+
3+
import (
4+
"bytes"
5+
"testing"
6+
)
7+
8+
// many cases taken from https://github.com/rvagg/iamap/blob/fad95295b013c8b4f0faac6dd5d9be175f6e606c/test/bit-utils-test.js
9+
// but rev() is used to reverse the data in most instances
10+
11+
// reverse for BE format
12+
func rev(in []byte) []byte {
13+
out := make([]byte, len(in))
14+
for i := 0; i < len(in); i++ {
15+
out[len(in)-1-i] = in[i]
16+
}
17+
return out
18+
}
19+
20+
func TestBitmapHas(t *testing.T) {
21+
type tcase struct {
22+
bytes []byte
23+
pos int
24+
set bool
25+
}
26+
cases := []tcase{
27+
{b(0b0), 0, false},
28+
{b(0b1), 0, true},
29+
{b(0b101010), 2, false},
30+
{b(0b101010), 3, true},
31+
{b(0b101010), 4, false},
32+
{b(0b101010), 5, true},
33+
{b(0b100000), 5, true},
34+
{b(0b0100000), 5, true},
35+
{b(0b00100000), 5, true},
36+
{[]byte{0x0, 0b00100000}, 8 + 5, true},
37+
{[]byte{0x0, 0x0, 0b00100000}, 8*2 + 5, true},
38+
{[]byte{0x0, 0x0, 0x0, 0b00100000}, 8*3 + 5, true},
39+
{[]byte{0x0, 0x0, 0x0, 0x0, 0b00100000}, 8*4 + 5, true},
40+
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0b00100000}, 8*5 + 5, true},
41+
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0b00100000}, 8*4 + 5, false},
42+
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0b00100000}, 8*3 + 5, false},
43+
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0b00100000}, 8*2 + 5, false},
44+
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0b00100000}, 8 + 5, false},
45+
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, 0b00100000}, 5, false},
46+
}
47+
48+
for _, c := range cases {
49+
bm := NewBitmapFrom(rev(c.bytes))
50+
if bm.IsSet(c.pos) != c.set {
51+
t.Fatalf("bitmap %v IsSet(%v) should be %v", c.bytes, c.pos, c.set)
52+
}
53+
}
54+
}
55+
56+
func TestBitmapBitWidth(t *testing.T) {
57+
for i := 3; i <= 16; i++ {
58+
if NewBitmap(i).BitWidth() != i {
59+
t.Fatal("incorrect bitWidth calculation")
60+
}
61+
if NewBitmapFrom(make([]byte, (1<<i)/8)).BitWidth() != i {
62+
t.Fatal("incorrect bitWidth calculation")
63+
}
64+
}
65+
}
66+
67+
func TestBitmapIndex(t *testing.T) {
68+
type tcase struct {
69+
bytes []byte
70+
pos int
71+
expected int
72+
}
73+
cases := []tcase{
74+
{b(0b111111), 0, 0},
75+
{b(0b111111), 1, 1},
76+
{b(0b111111), 2, 2},
77+
{b(0b111111), 4, 4},
78+
{b(0b111100), 2, 0},
79+
{b(0b111101), 4, 3},
80+
{b(0b111001), 4, 2},
81+
{b(0b111000), 4, 1},
82+
{b(0b110000), 4, 0},
83+
{b(0b000000), 0, 0},
84+
{b(0b000000), 1, 0},
85+
{b(0b000000), 2, 0},
86+
{b(0b000000), 3, 0},
87+
{[]byte{0x0, 0x0, 0x0}, 20, 0},
88+
{[]byte{0xff, 0xff, 0xff}, 5, 5},
89+
{[]byte{0xff, 0xff, 0xff}, 7, 7},
90+
{[]byte{0xff, 0xff, 0xff}, 8, 8},
91+
{[]byte{0xff, 0xff, 0xff}, 10, 10},
92+
{[]byte{0xff, 0xff, 0xff}, 20, 20},
93+
}
94+
95+
for _, c := range cases {
96+
bm := NewBitmapFrom(rev(c.bytes))
97+
if bm.Index(c.pos) != c.expected {
98+
t.Fatalf("bitmap %v Index(%v) should be %v", c.bytes, c.pos, c.expected)
99+
}
100+
}
101+
}
102+
103+
func TestBitmap_32bitFixed(t *testing.T) {
104+
// a 32-byte bitmap and a list of all the bits that are set
105+
byts := []byte{
106+
0b00100101, 0b10000000, 0b00000000, 0b01000000,
107+
0b00000000, 0b01000000, 0b00000000, 0b01000000,
108+
0b00000000, 0b00100000, 0b00000000, 0b01000000,
109+
0b00000000, 0b00010000, 0b00000000, 0b01000000,
110+
0b00000000, 0b00001000, 0b00000000, 0b01000000,
111+
0b00000000, 0b00000100, 0b00000000, 0b01000000,
112+
0b00000000, 0b00000010, 0b00000000, 0b01000000,
113+
0b00000000, 0b00000001, 0b00000000, 0b01000000,
114+
}
115+
bm := NewBitmapFrom(rev(byts))
116+
set := []int{
117+
0, 2, 5, 8 + 7, 8*3 + 6,
118+
8*5 + 6, 8*7 + 6,
119+
8*9 + 5, 8*11 + 6,
120+
8*13 + 4, 8*15 + 6,
121+
8*17 + 3, 8*19 + 6,
122+
8*21 + 2, 8*23 + 6,
123+
8*25 + 1, 8*27 + 6,
124+
8 * 29, 8*31 + 6}
125+
126+
c := 0
127+
for i := 0; i < 256; i++ {
128+
if c < len(set) && i == set[c] {
129+
if !bm.IsSet(i) {
130+
t.Fatalf("IsSet(%v) should be true", i)
131+
}
132+
// the index c of `set` also gives us the translation of Index(i)
133+
if bm.Index(i) != c {
134+
t.Fatalf("Index(%v) should be %v", i, c)
135+
}
136+
c++
137+
} else {
138+
if bm.IsSet(i) {
139+
t.Fatalf("IsSet(%v) should be false", i)
140+
}
141+
}
142+
}
143+
}
144+
145+
func TestBitmapSetBytes(t *testing.T) {
146+
if !bytes.Equal(NewBitmap(3).Set(0, true).Bytes, rev([]byte{0b00000001})) {
147+
t.Fatal("Failed bytes comparison")
148+
}
149+
if !bytes.Equal(NewBitmap(3).Set(1, true).Bytes, rev([]byte{0b00000010})) {
150+
t.Fatal("Failed bytes comparison")
151+
}
152+
if !bytes.Equal(NewBitmap(3).Set(7, true).Bytes, rev([]byte{0b10000000})) {
153+
t.Fatal("Failed bytes comparison")
154+
}
155+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b11111111})).Set(0, true).Bytes, rev([]byte{0b11111111})) {
156+
t.Fatal("Failed bytes comparison")
157+
}
158+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b11111111})).Set(7, true).Bytes, rev([]byte{0b11111111})) {
159+
t.Fatal("Failed bytes comparison")
160+
}
161+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b01010101})).Set(1, true).Bytes, rev([]byte{0b01010111})) {
162+
t.Fatal("Failed bytes comparison")
163+
}
164+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b01010101})).Set(7, true).Bytes, rev([]byte{0b11010101})) {
165+
t.Fatal("Failed bytes comparison")
166+
}
167+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b11111111})).Set(0, false).Bytes, rev([]byte{0b11111110})) {
168+
t.Fatal("Failed bytes comparison")
169+
}
170+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b11111111})).Set(1, false).Bytes, rev([]byte{0b11111101})) {
171+
t.Fatal("Failed bytes comparison")
172+
}
173+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b11111111})).Set(7, false).Bytes, rev([]byte{0b01111111})) {
174+
t.Fatal("Failed bytes comparison")
175+
}
176+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0, 0b11111111})).Set(8+0, true).Bytes, rev([]byte{0, 0b11111111})) {
177+
t.Fatal("Failed bytes comparison")
178+
}
179+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0, 0b11111111})).Set(8+7, true).Bytes, rev([]byte{0, 0b11111111})) {
180+
t.Fatal("Failed bytes comparison")
181+
}
182+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0, 0b01010101})).Set(8+1, true).Bytes, rev([]byte{0, 0b01010111})) {
183+
t.Fatal("Failed bytes comparison")
184+
}
185+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0, 0b01010101})).Set(8+7, true).Bytes, rev([]byte{0, 0b11010101})) {
186+
t.Fatal("Failed bytes comparison")
187+
}
188+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0, 0b11111111})).Set(8+0, false).Bytes, rev([]byte{0, 0b11111110})) {
189+
t.Fatal("Failed bytes comparison")
190+
}
191+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0, 0b11111111})).Set(8+1, false).Bytes, rev([]byte{0, 0b11111101})) {
192+
t.Fatal("Failed bytes comparison")
193+
}
194+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0, 0b11111111})).Set(8+7, false).Bytes, rev([]byte{0, 0b01111111})) {
195+
t.Fatal("Failed bytes comparison")
196+
}
197+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0})).Set(0, false).Bytes, rev([]byte{0b00000000})) {
198+
t.Fatal("Failed bytes comparison")
199+
}
200+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0})).Set(7, false).Bytes, rev([]byte{0b00000000})) {
201+
t.Fatal("Failed bytes comparison")
202+
}
203+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b01010101})).Set(0, false).Bytes, rev([]byte{0b01010100})) {
204+
t.Fatal("Failed bytes comparison")
205+
}
206+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b01010101})).Set(6, false).Bytes, rev([]byte{0b00010101})) {
207+
t.Fatal("Failed bytes comparison")
208+
}
209+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b11000010, 0b11010010, 0b01001010, 0b0000001})).Set(0, false).Bytes, rev([]byte{0b11000010, 0b11010010, 0b01001010, 0b0000001})) {
210+
t.Fatal("Failed bytes comparison")
211+
}
212+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b11000010, 0b11010010, 0b01001010, 0b0000001})).Set(0, true).Bytes, rev([]byte{0b11000011, 0b11010010, 0b01001010, 0b0000001})) {
213+
t.Fatal("Failed bytes comparison")
214+
}
215+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b11000010, 0b11010010, 0b01001010, 0b0000001})).Set(12, false).Bytes, rev([]byte{0b11000010, 0b11000010, 0b01001010, 0b0000001})) {
216+
t.Fatal("Failed bytes comparison")
217+
}
218+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b11000010, 0b11010010, 0b01001010, 0b0000001})).Set(12, true).Bytes, rev([]byte{0b11000010, 0b11010010, 0b01001010, 0b0000001})) {
219+
t.Fatal("Failed bytes comparison")
220+
}
221+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b11000010, 0b11010010, 0b01001010, 0b0000001})).Set(24, false).Bytes, rev([]byte{0b11000010, 0b11010010, 0b01001010, 0b0000000})) {
222+
t.Fatal("Failed bytes comparison")
223+
}
224+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0b11000010, 0b11010010, 0b01001010, 0b0000001})).Set(24, true).Bytes, rev([]byte{0b11000010, 0b11010010, 0b01001010, 0b0000001})) {
225+
t.Fatal("Failed bytes comparison")
226+
}
227+
if !bytes.Equal(NewBitmapFrom(rev([]byte{0, 0, 0, 0})).Set(31, true).Bytes, rev([]byte{0, 0, 0, 0b10000000})) {
228+
t.Fatal("Failed bytes comparison")
229+
}
230+
}

Diff for: cbor_gen.go

+6-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)