-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[fix] use less memory / speed-up parsing (#5)
* [fix] use less memory / speed-up parsing * [clean] * [parser] lazy approach * [review] * [fix] * [review] * [review] * [review] * [review] * [review]
- Loading branch information
1 parent
42648ea
commit 8852bd3
Showing
15 changed files
with
1,307 additions
and
161 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,4 @@ | ||
.idea/ | ||
vendor/ | ||
vendor/ | ||
*.pprof | ||
coverage.* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
package iabtcf | ||
|
||
import ( | ||
"fmt" | ||
"time" | ||
) | ||
|
||
// ////////////////////////////////////////////////// | ||
// bits | ||
|
||
// Bits represents a bitset with some helpers to read int, bool, string and time fields | ||
// | ||
// Bits are stored in a byte slice | ||
// First byte will store the first 8 bits, second byte the next 8 bits, and so on | ||
// | ||
// note: the last byte may contain less than 8 bits. Those bits are left aligned. | ||
type Bits []byte | ||
|
||
// HasBit checks if the bit number is set | ||
// | ||
// note: number is not the index and it starts at 1. | ||
func (b Bits) HasBit(number int) bool { | ||
return b.ReadBoolField(number - 1) | ||
} | ||
|
||
// Length returns the number of bits in the bitset | ||
func (b Bits) Length() int { | ||
return len(b) * nbBitInByte | ||
} | ||
|
||
const ( | ||
nbBitInByte = 8 | ||
lastBitIndex = nbBitInByte - 1 | ||
) | ||
|
||
var ( | ||
bitMasks = [nbBitInByte]byte{ | ||
1 << 7, | ||
1 << 6, | ||
1 << 5, | ||
1 << 4, | ||
1 << 3, | ||
1 << 2, | ||
1 << 1, | ||
1, | ||
} | ||
) | ||
|
||
// ReadInt64Field reads an int64 field of nbBits bits starting at offset | ||
// | ||
// note: if offset is negative, the result will be zero | ||
// note: if offset + nbBits is out of bound, the result will be the same if we were adding trailing zeros | ||
// example: 00101 > read with offset 2 and nbBits 5 > equivalent to reading 10100 = 20 | ||
func (b Bits) ReadInt64Field(offset, nbBits int) int64 { | ||
if offset < 0 { | ||
return 0 | ||
} | ||
var result int64 | ||
byteIndex := offset / nbBitInByte | ||
if byteIndex >= len(b) { | ||
return result | ||
} | ||
bitIndex := offset % nbBitInByte | ||
for i := 0; i < nbBits; i++ { | ||
mask := bitMasks[bitIndex] | ||
if b[byteIndex]&mask == mask { | ||
result |= 1 << (nbBits - 1 - i) | ||
} | ||
if bitIndex == lastBitIndex { | ||
byteIndex++ | ||
if byteIndex >= len(b) { | ||
return result | ||
} | ||
bitIndex = 0 | ||
} else { | ||
bitIndex++ | ||
} | ||
} | ||
return result | ||
} | ||
|
||
// ReadIntField reads an int field of nbBits bits starting at offset | ||
func (b *Bits) ReadIntField(offset, nbBits int) int { | ||
return int(b.ReadInt64Field(offset, nbBits)) | ||
} | ||
|
||
const ( | ||
timeNbBits = 36 | ||
) | ||
|
||
// ReadTimeField reads a time field of 36 bits starting at offset | ||
func (b *Bits) ReadTimeField(offset int) time.Time { | ||
ds := b.ReadInt64Field(offset, timeNbBits) | ||
return time.Unix(ds/dsPerSec, (ds%dsPerSec)*nsPerDs).UTC() | ||
} | ||
|
||
const ( | ||
characterNbBits = 6 | ||
) | ||
|
||
// ReadStringField reads a string field of nbBits bits starting at offset | ||
// | ||
// note: each character is represented by 6 bits, so the number of bits must be a multiple of 6 | ||
// note: the characters are represented by the uppercase alphabet starting from 'A' | ||
func (b *Bits) ReadStringField(offset, nbBits int) string { | ||
length := nbBits / characterNbBits | ||
var buf = make([]byte, 0, length) | ||
nextOffset := offset | ||
for i := 0; i < length; i++ { | ||
value := b.ReadInt64Field(nextOffset, characterNbBits) | ||
buf = append(buf, byte(value)+'A') | ||
nextOffset += characterNbBits | ||
} | ||
return string(buf) | ||
} | ||
|
||
const ( | ||
boolNbBits = 1 | ||
) | ||
|
||
// ReadBoolField reads a bool field of 1 bit starting at offset | ||
func (b *Bits) ReadBoolField(offset int) bool { | ||
return b.ReadInt64Field(offset, boolNbBits) == 1 | ||
} | ||
|
||
// ToBitString returns the bitset as a string of bits ( human readable 0s and 1s ) | ||
func (bits Bits) ToBitString() string { | ||
if bits == nil { | ||
return "" | ||
} | ||
|
||
result := "" | ||
|
||
for i, b := range bits { | ||
if i != 0 { | ||
result += " " | ||
} | ||
result += fmt.Sprintf("%08b", b) | ||
} | ||
|
||
return result | ||
} | ||
|
||
// ////////////////////////////////////////////////// | ||
// bit string helper | ||
|
||
// BitStringToBits converts a bit string to a Bits struct | ||
func BitStringToBits(value string) Bits { | ||
return Bits(BitStringToBytes(value)) | ||
} | ||
|
||
// BitStringToBytes converts a bit string to a byte slice | ||
func BitStringToBytes(value string) []byte { | ||
bytes := make([]byte, 0, len(value)/nbBitInByte) | ||
|
||
position := lastBitIndex | ||
var lastByte byte | ||
for i := 0; i < len(value); i++ { | ||
if value[i] == ' ' { | ||
continue | ||
} | ||
if value[i] == '1' { | ||
lastByte |= 1 << position | ||
} | ||
if position == 0 { | ||
position = lastBitIndex | ||
bytes = append(bytes, lastByte) | ||
lastByte = 0 | ||
} else { | ||
position-- | ||
} | ||
} | ||
if position != nbBitInByte-1 { | ||
bytes = append(bytes, lastByte) | ||
} | ||
return bytes | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
package iabtcf | ||
|
||
import ( | ||
"encoding/base64" | ||
"fmt" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestBits(t *testing.T) { | ||
|
||
wantHasBit := func(number int, expected []int) bool { | ||
for _, e := range expected { | ||
if number == e { | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
|
||
type TestCase struct { | ||
Base64 string | ||
WantBitString string | ||
WantHasBit []int | ||
} | ||
|
||
values := map[string]*TestCase{ | ||
"101": { | ||
Base64: "oA", | ||
WantBitString: "10100000", | ||
WantHasBit: []int{1, 3}, | ||
}, | ||
"00000001": { | ||
Base64: "AQ", | ||
WantBitString: "00000001", | ||
WantHasBit: []int{8}, | ||
}, | ||
"00000101": { | ||
Base64: "BQ", | ||
WantBitString: "00000101", | ||
WantHasBit: []int{6, 8}, | ||
}, | ||
"10000101": { | ||
Base64: "hQ", | ||
WantBitString: "10000101", | ||
WantHasBit: []int{1, 6, 8}, | ||
}, | ||
"00000001 00000101": { | ||
Base64: "AQU", | ||
WantBitString: "00000001 00000101", | ||
WantHasBit: []int{8, 14, 16}, | ||
}, | ||
"00000001 101": { | ||
Base64: "AaA", | ||
WantBitString: "00000001 10100000", | ||
WantHasBit: []int{8, 9, 11}, | ||
}, | ||
"00000001 00000000": { | ||
Base64: "AQA", | ||
WantBitString: "00000001 00000000", | ||
WantHasBit: []int{8}, | ||
}, | ||
"00000001 00000000 1": { | ||
Base64: "AQCA", | ||
WantBitString: "00000001 00000000 10000000", | ||
WantHasBit: []int{8, 17}, | ||
}, | ||
"00000001 0000001": { | ||
Base64: "AQI", | ||
WantBitString: "00000001 00000010", | ||
WantHasBit: []int{8, 15}, | ||
}, | ||
} | ||
|
||
for bitString, tc := range values { | ||
t.Run(bitString, func(t *testing.T) { | ||
t.Helper() | ||
fmt.Printf("\n[test] ---------- %s ---------- \n", bitString) | ||
var wantBytes, err = base64.RawURLEncoding.DecodeString(tc.Base64) | ||
fmt.Printf("[test] base64: %s >>> bytes: %v \n", tc.Base64, wantBytes) | ||
require.NoError(t, err, "unexpected base64 error") | ||
|
||
gotBits := BitStringToBits(bitString) | ||
fmt.Printf("[test] bits: %s >>> bytes: %v \n", bitString, gotBits) | ||
require.Equal(t, wantBytes, []byte(gotBits)) | ||
|
||
fmt.Printf("[test] Bits: %v \n", gotBits) | ||
|
||
fmt.Printf("[test] bytes: %v >>> bits: %s \n", gotBits, gotBits.ToBitString()) | ||
require.Equal(t, tc.WantBitString, gotBits.ToBitString()) | ||
|
||
fmt.Printf("[test] WantHasBit: %v \n", tc.WantHasBit) | ||
length := len(strings.ReplaceAll(bitString, " ", "")) | ||
for number := 1; number <= length; number++ { | ||
gotHasBit := gotBits.HasBit(number) | ||
wantHasBit := wantHasBit(number, tc.WantHasBit) | ||
require.Equal(t, wantHasBit, gotHasBit) | ||
} | ||
}) | ||
} | ||
} |
Oops, something went wrong.