Skip to content

Commit

Permalink
[fix] use less memory / speed-up parsing (#5)
Browse files Browse the repository at this point in the history
* [fix] use less memory / speed-up parsing

* [clean]

* [parser] lazy approach

* [review]

* [fix]

* [review]

* [review]

* [review]

* [review]

* [review]
  • Loading branch information
gvaligiani authored Jun 19, 2024
1 parent 42648ea commit 8852bd3
Show file tree
Hide file tree
Showing 15 changed files with 1,307 additions and 161 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
.idea/
vendor/
vendor/
*.pprof
coverage.*
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Library provides convenient way to check if:

go get -v github.com/travelaudience/go-iabtcf

### Example
### Example - Normal Parsing

package main

Expand All @@ -34,6 +34,27 @@ Library provides convenient way to check if:
sf := s.EverySpecialFeatureAllowed([]int{1})
va := s.VendorAllowed(1)
}

### Example - Lazy Parsing

package main

import (
"fmt"

"github.com/travelaudience/go-iabtcf"
)

func main() {
var s, err = iabtcf.LazyParseCoreString("COwIsAvOwIsAvBIAAAENAPCMAP_AAP_AAAAAFoQBQABAAGAAQAAwACQAAAAA.IFoEUQQgAIQwgIwQABAEAAAAOIAACAIAAAAQAIAgEAACEAAAAAgAQBAAAAAAAGBAAgAAAAAAAFAAECAAAgAAQARAEQAAAAAJAAIAAgAAAYQEAAAQmAgBC3ZAYzUw")
if err != nil {
panic(err)
}
pa := s.EveryPurposeAllowed([]int{1})
sf := s.EverySpecialFeatureAllowed([]int{1})
va := s.VendorAllowed(1)
}

## Contributing

Expand Down
177 changes: 177 additions & 0 deletions bits.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
package iabtcf

import (
"fmt"
"time"
)

// //////////////////////////////////////////////////
// bits

// Bits represents a bitset with some helpers to read int, bool, string and time fields
//
// Bits are stored in a byte slice
// First byte will store the first 8 bits, second byte the next 8 bits, and so on
//
// note: the last byte may contain less than 8 bits. Those bits are left aligned.
type Bits []byte

// HasBit checks if the bit number is set
//
// note: number is not the index and it starts at 1.
func (b Bits) HasBit(number int) bool {
return b.ReadBoolField(number - 1)
}

// Length returns the number of bits in the bitset
func (b Bits) Length() int {
return len(b) * nbBitInByte
}

const (
nbBitInByte = 8
lastBitIndex = nbBitInByte - 1
)

var (
bitMasks = [nbBitInByte]byte{
1 << 7,
1 << 6,
1 << 5,
1 << 4,
1 << 3,
1 << 2,
1 << 1,
1,
}
)

// ReadInt64Field reads an int64 field of nbBits bits starting at offset
//
// note: if offset is negative, the result will be zero
// note: if offset + nbBits is out of bound, the result will be the same if we were adding trailing zeros
// example: 00101 > read with offset 2 and nbBits 5 > equivalent to reading 10100 = 20
func (b Bits) ReadInt64Field(offset, nbBits int) int64 {
if offset < 0 {
return 0
}
var result int64
byteIndex := offset / nbBitInByte
if byteIndex >= len(b) {
return result
}
bitIndex := offset % nbBitInByte
for i := 0; i < nbBits; i++ {
mask := bitMasks[bitIndex]
if b[byteIndex]&mask == mask {
result |= 1 << (nbBits - 1 - i)
}
if bitIndex == lastBitIndex {
byteIndex++
if byteIndex >= len(b) {
return result
}
bitIndex = 0
} else {
bitIndex++
}
}
return result
}

// ReadIntField reads an int field of nbBits bits starting at offset
func (b *Bits) ReadIntField(offset, nbBits int) int {
return int(b.ReadInt64Field(offset, nbBits))
}

const (
timeNbBits = 36
)

// ReadTimeField reads a time field of 36 bits starting at offset
func (b *Bits) ReadTimeField(offset int) time.Time {
ds := b.ReadInt64Field(offset, timeNbBits)
return time.Unix(ds/dsPerSec, (ds%dsPerSec)*nsPerDs).UTC()
}

const (
characterNbBits = 6
)

// ReadStringField reads a string field of nbBits bits starting at offset
//
// note: each character is represented by 6 bits, so the number of bits must be a multiple of 6
// note: the characters are represented by the uppercase alphabet starting from 'A'
func (b *Bits) ReadStringField(offset, nbBits int) string {
length := nbBits / characterNbBits
var buf = make([]byte, 0, length)
nextOffset := offset
for i := 0; i < length; i++ {
value := b.ReadInt64Field(nextOffset, characterNbBits)
buf = append(buf, byte(value)+'A')
nextOffset += characterNbBits
}
return string(buf)
}

const (
boolNbBits = 1
)

// ReadBoolField reads a bool field of 1 bit starting at offset
func (b *Bits) ReadBoolField(offset int) bool {
return b.ReadInt64Field(offset, boolNbBits) == 1
}

// ToBitString returns the bitset as a string of bits ( human readable 0s and 1s )
func (bits Bits) ToBitString() string {
if bits == nil {
return ""
}

result := ""

for i, b := range bits {
if i != 0 {
result += " "
}
result += fmt.Sprintf("%08b", b)
}

return result
}

// //////////////////////////////////////////////////
// bit string helper

// BitStringToBits converts a bit string to a Bits struct
func BitStringToBits(value string) Bits {
return Bits(BitStringToBytes(value))
}

// BitStringToBytes converts a bit string to a byte slice
func BitStringToBytes(value string) []byte {
bytes := make([]byte, 0, len(value)/nbBitInByte)

position := lastBitIndex
var lastByte byte
for i := 0; i < len(value); i++ {
if value[i] == ' ' {
continue
}
if value[i] == '1' {
lastByte |= 1 << position
}
if position == 0 {
position = lastBitIndex
bytes = append(bytes, lastByte)
lastByte = 0
} else {
position--
}
}
if position != nbBitInByte-1 {
bytes = append(bytes, lastByte)
}
return bytes
}
103 changes: 103 additions & 0 deletions bits_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package iabtcf

import (
"encoding/base64"
"fmt"
"strings"
"testing"

"github.com/stretchr/testify/require"
)

func TestBits(t *testing.T) {

wantHasBit := func(number int, expected []int) bool {
for _, e := range expected {
if number == e {
return true
}
}
return false
}

type TestCase struct {
Base64 string
WantBitString string
WantHasBit []int
}

values := map[string]*TestCase{
"101": {
Base64: "oA",
WantBitString: "10100000",
WantHasBit: []int{1, 3},
},
"00000001": {
Base64: "AQ",
WantBitString: "00000001",
WantHasBit: []int{8},
},
"00000101": {
Base64: "BQ",
WantBitString: "00000101",
WantHasBit: []int{6, 8},
},
"10000101": {
Base64: "hQ",
WantBitString: "10000101",
WantHasBit: []int{1, 6, 8},
},
"00000001 00000101": {
Base64: "AQU",
WantBitString: "00000001 00000101",
WantHasBit: []int{8, 14, 16},
},
"00000001 101": {
Base64: "AaA",
WantBitString: "00000001 10100000",
WantHasBit: []int{8, 9, 11},
},
"00000001 00000000": {
Base64: "AQA",
WantBitString: "00000001 00000000",
WantHasBit: []int{8},
},
"00000001 00000000 1": {
Base64: "AQCA",
WantBitString: "00000001 00000000 10000000",
WantHasBit: []int{8, 17},
},
"00000001 0000001": {
Base64: "AQI",
WantBitString: "00000001 00000010",
WantHasBit: []int{8, 15},
},
}

for bitString, tc := range values {
t.Run(bitString, func(t *testing.T) {
t.Helper()
fmt.Printf("\n[test] ---------- %s ---------- \n", bitString)
var wantBytes, err = base64.RawURLEncoding.DecodeString(tc.Base64)
fmt.Printf("[test] base64: %s >>> bytes: %v \n", tc.Base64, wantBytes)
require.NoError(t, err, "unexpected base64 error")

gotBits := BitStringToBits(bitString)
fmt.Printf("[test] bits: %s >>> bytes: %v \n", bitString, gotBits)
require.Equal(t, wantBytes, []byte(gotBits))

fmt.Printf("[test] Bits: %v \n", gotBits)

fmt.Printf("[test] bytes: %v >>> bits: %s \n", gotBits, gotBits.ToBitString())
require.Equal(t, tc.WantBitString, gotBits.ToBitString())

fmt.Printf("[test] WantHasBit: %v \n", tc.WantHasBit)
length := len(strings.ReplaceAll(bitString, " ", ""))
for number := 1; number <= length; number++ {
gotHasBit := gotBits.HasBit(number)
wantHasBit := wantHasBit(number, tc.WantHasBit)
require.Equal(t, wantHasBit, gotHasBit)
}
})
}
}
Loading

0 comments on commit 8852bd3

Please sign in to comment.