-
Notifications
You must be signed in to change notification settings - Fork 24
feat(sdk): refactor TDF architecture with streaming support and segment-based writing #2652
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
strantalis
wants to merge
26
commits into
opentdf:main
Choose a base branch
from
strantalis:dspx-1465/tdf-writer-spike
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
Show all changes
26 commits
Select commit
Hold shift + click to select a range
5110834
feat(sdk): refactor TDF architecture with streaming support and segme…
strantalis a42c8b9
cleanup and try to properly handle crc32 for out of order segments
strantalis a052bc1
fix streaming based unit tests
strantalis c46d4e0
fix system metadata assertion version by moving it back to sdk
strantalis 7032424
perf improvements and benchmark tests
strantalis 65237e5
save doc changes
strantalis 54e3cd8
more cleanup
strantalis ce6fe1e
remove sync.Pool for buffer reuse until we have actual performance data
strantalis ce37c1d
remove unecessary crc32 update in write segment
strantalis cca4e9f
remove uecessary copy of []byte and make sure to copy data in AddSegment
strantalis 06df8be
more tests
strantalis b171d78
fix lint issues
strantalis 4a4800a
more lint fixes
strantalis 23599f0
revert policy string -> []byte change
strantalis 2d637f3
lint fixes and pb fix
strantalis a70cc2d
fix b64
strantalis 842869b
return segmentResult and finalizeResult
strantalis 68d02f9
- allow selecting final segments
strantalis e368e17
fix: add sparse indices
strantalis c555e7e
fix lint issues
strantalis 43d84fd
benchmark
strantalis 027637b
writer changes
imdominicreed 68b8a0e
update manifes and patch context
imdominicreed 5d90f86
allow empty and non start 0 segments
imdominicreed 485201d
always use zip64
imdominicreed b0691cf
Merge pull request #6 from imdominicreed/fix/get-manifest
strantalis File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| package archive | ||
|
|
||
| // CRC32CombineIEEE combines two CRC-32 (IEEE) checksums as if the data were concatenated. | ||
| // crc1 is the CRC of the first part, crc2 of the second part, and len2 is the byte length of the second part. | ||
| // This uses the standard reflected IEEE polynomial 0xEDB88320 as used by ZIP. | ||
| func CRC32CombineIEEE(crc1, crc2 uint32, len2 int64) uint32 { | ||
| if len2 <= 0 { | ||
| return crc1 | ||
| } | ||
|
|
||
| var even [32]uint32 | ||
| var odd [32]uint32 | ||
|
|
||
| // Operator for one zero bit in 'odd' | ||
| odd[0] = 0xEDB88320 // reflected IEEE polynomial | ||
| row := uint32(1) | ||
| for n := 1; n < 32; n++ { | ||
| odd[n] = row | ||
| row <<= 1 | ||
| } | ||
|
|
||
| // even = odd^(2), odd = even^(2) | ||
| gf2MatrixSquare(even[:], odd[:]) | ||
| gf2MatrixSquare(odd[:], even[:]) | ||
|
|
||
| // Apply len2 zero bytes to crc1 | ||
| for { | ||
| gf2MatrixSquare(even[:], odd[:]) | ||
| if (len2 & 1) != 0 { | ||
| crc1 = gf2MatrixTimes(even[:], crc1) | ||
| } | ||
| len2 >>= 1 | ||
| if len2 == 0 { | ||
| break | ||
| } | ||
| gf2MatrixSquare(odd[:], even[:]) | ||
| if (len2 & 1) != 0 { | ||
| crc1 = gf2MatrixTimes(odd[:], crc1) | ||
| } | ||
| len2 >>= 1 | ||
| if len2 == 0 { | ||
| break | ||
| } | ||
| } | ||
|
|
||
| return crc1 ^ crc2 | ||
| } | ||
|
|
||
| func gf2MatrixTimes(mat []uint32, vec uint32) uint32 { | ||
| var sum uint32 | ||
| i := 0 | ||
| for vec != 0 { | ||
| if (vec & 1) != 0 { | ||
| sum ^= mat[i] | ||
| } | ||
| vec >>= 1 | ||
| i++ | ||
| } | ||
| return sum | ||
| } | ||
|
|
||
| func gf2MatrixSquare(square, mat []uint32) { | ||
| for n := 0; n < 32; n++ { | ||
| square[n] = gf2MatrixTimes(mat, mat[n]) | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| package archive | ||
|
|
||
| import ( | ||
| "hash/crc32" | ||
| "math/rand" | ||
| "testing" | ||
| "time" | ||
| ) | ||
|
|
||
| func TestCRC32CombineIEEE_Basic(t *testing.T) { | ||
| rand.Seed(time.Now().UnixNano()) | ||
| a := make([]byte, 1024) | ||
| b := make([]byte, 2048) | ||
| rand.Read(a) | ||
| rand.Read(b) | ||
|
|
||
| crcA := crc32.ChecksumIEEE(a) | ||
| crcB := crc32.ChecksumIEEE(b) | ||
| combined := CRC32CombineIEEE(crcA, crcB, int64(len(b))) | ||
|
|
||
| all := append(append([]byte{}, a...), b...) | ||
| want := crc32.ChecksumIEEE(all) | ||
|
|
||
| if combined != want { | ||
| t.Fatalf("combined CRC mismatch: got %08x want %08x", combined, want) | ||
| } | ||
| } | ||
|
|
||
| func TestCRC32CombineIEEE_MultiChunks(t *testing.T) { | ||
| rand.Seed(42) | ||
| chunks := make([][]byte, 10) | ||
| for i := range chunks { | ||
| n := 1 + rand.Intn(8192) | ||
| chunks[i] = make([]byte, n) | ||
| rand.Read(chunks[i]) | ||
| } | ||
|
|
||
| // Combine sequentially | ||
| var total uint32 | ||
| var init bool | ||
| for _, c := range chunks { | ||
| crc := crc32.ChecksumIEEE(c) | ||
| if !init { | ||
| total = crc | ||
| init = true | ||
| } else { | ||
| total = CRC32CombineIEEE(total, crc, int64(len(c))) | ||
| } | ||
| } | ||
|
|
||
| // Compute directly over concatenation | ||
| var all []byte | ||
| for _, c := range chunks { | ||
| all = append(all, c...) | ||
| } | ||
| want := crc32.ChecksumIEEE(all) | ||
|
|
||
| if total != want { | ||
| t.Fatalf("multi-chunk combined CRC mismatch: got %08x want %08x", total, want) | ||
| } | ||
| } | ||
|
|
||
| func TestCRC32CombineIEEE_Associativity(t *testing.T) { | ||
| a := []byte("alpha") | ||
| b := []byte("beta") | ||
| c := []byte("charlie") | ||
|
|
||
| ca := crc32.ChecksumIEEE(a) | ||
| cb := crc32.ChecksumIEEE(b) | ||
| cc := crc32.ChecksumIEEE(c) | ||
|
|
||
| left := CRC32CombineIEEE(ca, CRC32CombineIEEE(cb, cc, int64(len(c))), int64(len(b)+len(c))) | ||
| right := CRC32CombineIEEE(CRC32CombineIEEE(ca, cb, int64(len(b))), cc, int64(len(c))) | ||
|
|
||
| if left != right { | ||
| t.Fatalf("associativity failed: left %08x right %08x", left, right) | ||
| } | ||
| } | ||
|
|
||
| func TestCRC32CombineIEEE_ZeroLength(t *testing.T) { | ||
| a := []byte("data") | ||
| ca := crc32.ChecksumIEEE(a) | ||
| // Combining with zero-length second part should be identity | ||
| got := CRC32CombineIEEE(ca, 0, 0) | ||
| if got != ca { | ||
| t.Fatalf("zero-length combine mismatch: got %08x want %08x", got, ca) | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.