Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
5110834
feat(sdk): refactor TDF architecture with streaming support and segme…
strantalis Aug 25, 2025
a42c8b9
cleanup and try to properly handle crc32 for out of order segments
strantalis Aug 27, 2025
a052bc1
fix streaming based unit tests
strantalis Aug 27, 2025
c46d4e0
fix system metadata assertion version by moving it back to sdk
strantalis Aug 27, 2025
7032424
perf improvements and benchmark tests
strantalis Aug 28, 2025
65237e5
save doc changes
strantalis Aug 28, 2025
54e3cd8
more cleanup
strantalis Aug 29, 2025
ce6fe1e
remove sync.Pool for buffer reuse until we have actual performance data
strantalis Aug 29, 2025
ce37c1d
remove unecessary crc32 update in write segment
strantalis Aug 29, 2025
cca4e9f
remove uecessary copy of []byte and make sure to copy data in AddSegment
strantalis Aug 29, 2025
06df8be
more tests
strantalis Sep 2, 2025
b171d78
fix lint issues
strantalis Sep 3, 2025
4a4800a
more lint fixes
strantalis Sep 3, 2025
23599f0
revert policy string -> []byte change
strantalis Sep 3, 2025
2d637f3
lint fixes and pb fix
strantalis Sep 3, 2025
a70cc2d
fix b64
strantalis Sep 4, 2025
842869b
return segmentResult and finalizeResult
strantalis Sep 4, 2025
68d02f9
- allow selecting final segments
strantalis Sep 11, 2025
e368e17
fix: add sparse indices
strantalis Sep 16, 2025
c555e7e
fix lint issues
strantalis Sep 16, 2025
43d84fd
benchmark
strantalis Sep 16, 2025
027637b
writer changes
imdominicreed Sep 17, 2025
68b8a0e
update manifes and patch context
imdominicreed Sep 17, 2025
5d90f86
allow empty and non start 0 segments
imdominicreed Sep 17, 2025
485201d
always use zip64
imdominicreed Sep 17, 2025
b0691cf
Merge pull request #6 from imdominicreed/fix/get-manifest
strantalis Sep 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
420 changes: 420 additions & 0 deletions sdk/internal/archive/benchmark_test.go

Large diffs are not rendered by default.

66 changes: 66 additions & 0 deletions sdk/internal/archive/crc32combine.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package archive

// CRC32CombineIEEE combines two CRC-32 (IEEE) checksums as if the data were concatenated.
// crc1 is the CRC of the first part, crc2 of the second part, and len2 is the byte length of the second part.
// This uses the standard reflected IEEE polynomial 0xEDB88320 as used by ZIP.
func CRC32CombineIEEE(crc1, crc2 uint32, len2 int64) uint32 {
if len2 <= 0 {
return crc1
}

var even [32]uint32
var odd [32]uint32

// Operator for one zero bit in 'odd'
odd[0] = 0xEDB88320 // reflected IEEE polynomial
row := uint32(1)
for n := 1; n < 32; n++ {
odd[n] = row
row <<= 1
}

// even = odd^(2), odd = even^(2)
gf2MatrixSquare(even[:], odd[:])
gf2MatrixSquare(odd[:], even[:])

// Apply len2 zero bytes to crc1
for {
gf2MatrixSquare(even[:], odd[:])
if (len2 & 1) != 0 {
crc1 = gf2MatrixTimes(even[:], crc1)
}
len2 >>= 1
if len2 == 0 {
break
}
gf2MatrixSquare(odd[:], even[:])
if (len2 & 1) != 0 {
crc1 = gf2MatrixTimes(odd[:], crc1)
}
len2 >>= 1
if len2 == 0 {
break
}
}

return crc1 ^ crc2
}

func gf2MatrixTimes(mat []uint32, vec uint32) uint32 {
var sum uint32
i := 0
for vec != 0 {
if (vec & 1) != 0 {
sum ^= mat[i]
}
vec >>= 1
i++
}
return sum
}

func gf2MatrixSquare(square, mat []uint32) {
for n := 0; n < 32; n++ {
square[n] = gf2MatrixTimes(mat, mat[n])
}
}
88 changes: 88 additions & 0 deletions sdk/internal/archive/crc32combine_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package archive

import (
"hash/crc32"
"math/rand"
"testing"
"time"
)

func TestCRC32CombineIEEE_Basic(t *testing.T) {
rand.Seed(time.Now().UnixNano())
a := make([]byte, 1024)
b := make([]byte, 2048)
rand.Read(a)
rand.Read(b)

crcA := crc32.ChecksumIEEE(a)
crcB := crc32.ChecksumIEEE(b)
combined := CRC32CombineIEEE(crcA, crcB, int64(len(b)))

all := append(append([]byte{}, a...), b...)
want := crc32.ChecksumIEEE(all)

if combined != want {
t.Fatalf("combined CRC mismatch: got %08x want %08x", combined, want)
}
}

func TestCRC32CombineIEEE_MultiChunks(t *testing.T) {
rand.Seed(42)
chunks := make([][]byte, 10)
for i := range chunks {
n := 1 + rand.Intn(8192)
chunks[i] = make([]byte, n)
rand.Read(chunks[i])
}

// Combine sequentially
var total uint32
var init bool
for _, c := range chunks {
crc := crc32.ChecksumIEEE(c)
if !init {
total = crc
init = true
} else {
total = CRC32CombineIEEE(total, crc, int64(len(c)))
}
}

// Compute directly over concatenation
var all []byte
for _, c := range chunks {
all = append(all, c...)
}
want := crc32.ChecksumIEEE(all)

if total != want {
t.Fatalf("multi-chunk combined CRC mismatch: got %08x want %08x", total, want)
}
}

func TestCRC32CombineIEEE_Associativity(t *testing.T) {
a := []byte("alpha")
b := []byte("beta")
c := []byte("charlie")

ca := crc32.ChecksumIEEE(a)
cb := crc32.ChecksumIEEE(b)
cc := crc32.ChecksumIEEE(c)

left := CRC32CombineIEEE(ca, CRC32CombineIEEE(cb, cc, int64(len(c))), int64(len(b)+len(c)))
right := CRC32CombineIEEE(CRC32CombineIEEE(ca, cb, int64(len(b))), cc, int64(len(c)))

if left != right {
t.Fatalf("associativity failed: left %08x right %08x", left, right)
}
}

func TestCRC32CombineIEEE_ZeroLength(t *testing.T) {
a := []byte("data")
ca := crc32.ChecksumIEEE(a)
// Combining with zero-length second part should be identity
got := CRC32CombineIEEE(ca, 0, 0)
if got != ca {
t.Fatalf("zero-length combine mismatch: got %08x want %08x", got, ca)
}
}
102 changes: 102 additions & 0 deletions sdk/internal/archive/reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,108 @@ import (
"testing"
)

const (
oneKB = 1024
tenKB = 10 * oneKB
oneMB = 1024 * 1024
hundredMB = 100 * oneMB
oneGB = 10 * hundredMB
tenGB = 10 * oneGB
)

type ZipEntryInfo struct {
filename string
size int64
}

var ArchiveTests = []struct { //nolint:gochecknoglobals // This global is used as test harness for other tests
files []ZipEntryInfo
archiveSize int64
}{
{
[]ZipEntryInfo{
{
"1.txt",
10,
},
{
"2.txt",
10,
},
{
"3.txt",
10,
},
},
358,
},
{
[]ZipEntryInfo{
{
"1.txt",
oneKB,
},
{
"2.txt",
oneKB,
},
{
"3.txt",
oneKB,
},
{
"4.txt",
oneKB,
},
{
"5.txt",
oneKB,
},
{
"6.txt",
oneKB,
},
},
6778,
},
{
[]ZipEntryInfo{
{
"1.txt",
hundredMB,
},
{
"2.txt",
hundredMB,
},
{
"3.txt",
hundredMB,
},
{
"4.txt",
hundredMB,
},
{
"5.txt",
hundredMB + oneMB + tenKB,
},
{
".txt",
oneMB + oneKB,
},
},
526397048,
},
}

// create a buffer of 2mb and fill it with 0xFF, and
// it used to fill with the contents of the files
var (
stepSize int64 = 2 * oneMB //nolint:gochecknoglobals // This global is used in other tests
writeBuffer = make([]byte, stepSize) //nolint:gochecknoglobals // This is used as reuse buffer
)

func TestCreateArchiveReader(t *testing.T) { // use native library("archive/zip") to create zip files
nativeZipFiles(t)

Expand Down
Loading