Skip to content
Draft

3.0.0 #137

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
362 commits
Select commit Hold shift + click to select a range
520a2a7
data block forward reader
marvin-j97 May 5, 2025
71f42a8
drop guardian
marvin-j97 May 5, 2025
b8698fe
refactor
marvin-j97 May 5, 2025
e596175
doc
marvin-j97 May 5, 2025
b6899da
rename structs
marvin-j97 May 5, 2025
97a64e9
fix
marvin-j97 May 5, 2025
a87060b
add indirection for trailer->block handles
marvin-j97 May 5, 2025
fb890fa
refactor: bloom filter
marvin-j97 May 10, 2025
f29d1b7
feat: (un)pinned filter logic
marvin-j97 May 10, 2025
c28767f
move tree version
marvin-j97 May 10, 2025
bcffea2
segment inner pinned filter block
marvin-j97 May 10, 2025
b29b000
wip
marvin-j97 May 10, 2025
85d8553
disable some tests temporarily
marvin-j97 May 10, 2025
66d20df
misc
marvin-j97 May 10, 2025
2d05696
wip
marvin-j97 May 10, 2025
89f93fa
in segment scanner, use data block forward reader
marvin-j97 May 10, 2025
45dda56
skip buffer zeroing in block load
marvin-j97 May 10, 2025
3ad02da
refactor: remove superfluous parameter
marvin-j97 May 10, 2025
fa0c52d
fix
marvin-j97 May 10, 2025
0a3cf38
refactor
marvin-j97 May 10, 2025
dd0e508
fix: data block snapshot read
marvin-j97 May 23, 2025
508f919
rename TreeVersion
marvin-j97 May 23, 2025
b4e3adc
doc: fix complexity of operations
marvin-j97 May 23, 2025
ad46c7f
add derives to Timestamp
marvin-j97 May 23, 2025
80a8cb3
wip
marvin-j97 May 23, 2025
9be5bf0
refactor: checksum
marvin-j97 May 23, 2025
b7672df
refactor: block
marvin-j97 May 23, 2025
2e6fd99
add simple tree reload test
marvin-j97 May 26, 2025
eebab0b
add key range partitions structs
marvin-j97 May 26, 2025
ee7e477
add version & run structs
marvin-j97 May 26, 2025
29fbd0c
change level_reader and _scanner to use run struct
marvin-j97 May 26, 2025
56fd4fb
update level manifest with new version & run structs
marvin-j97 May 26, 2025
ce3d6f9
update Tree to use new levels structs
marvin-j97 May 26, 2025
628a08c
update multi reader
marvin-j97 May 26, 2025
72f1961
update leveled compaction
marvin-j97 May 26, 2025
502fedf
refactor: data block item count skip
marvin-j97 May 26, 2025
f38c9da
prepare range
marvin-j97 May 26, 2025
2578599
update module names in lib.rs
marvin-j97 May 26, 2025
0311dab
remove unused string constant
marvin-j97 May 26, 2025
2a0323e
update compaction worker to use new levels structs
marvin-j97 May 26, 2025
f9a9bc1
fix test
marvin-j97 May 27, 2025
7368f3b
restore segment metadata.tombstone_count
marvin-j97 May 27, 2025
aef9aeb
update test
marvin-j97 May 27, 2025
492b4d0
fix
marvin-j97 May 27, 2025
89a0b82
wip
marvin-j97 May 27, 2025
fe2145f
fix test
marvin-j97 May 27, 2025
2f2d407
fix test
marvin-j97 May 27, 2025
0a136c5
fix test
marvin-j97 May 27, 2025
e1a5576
add fallible clipping iter
marvin-j97 May 30, 2025
9fed8cf
Update mod.rs
marvin-j97 Jun 3, 2025
f791ca6
Update offset.rs
marvin-j97 Jun 3, 2025
083d818
Update trailer.rs
marvin-j97 Jun 3, 2025
6f0f5ed
Update trailer.rs
marvin-j97 Jun 3, 2025
a4393c7
use vlog git dep
marvin-j97 Jun 3, 2025
5c8d1ed
add conversion method for InternalKey
marvin-j97 Jun 4, 2025
94e82ed
change segment path type
marvin-j97 Jun 4, 2025
6aeb578
refactor
marvin-j97 Jun 4, 2025
03ac00e
adjust test
marvin-j97 Jun 4, 2025
f5d3301
test: temp disable some tests
marvin-j97 Jun 4, 2025
5a820d6
add log
marvin-j97 Jun 4, 2025
262ba91
refactor: change type signature
marvin-j97 Jun 4, 2025
d6e7e6f
change segment print helper
marvin-j97 Jun 4, 2025
cb01fae
add load_block helper fn
marvin-j97 Jun 4, 2025
2d4c773
refactor simplified compare_prefixed_slice
marvin-j97 Jun 4, 2025
7977171
add license header
marvin-j97 Jun 4, 2025
1d7a257
temp disable another test
marvin-j97 Jun 4, 2025
a7b195c
fix: key range partitions
marvin-j97 Jun 4, 2025
2eef118
naming
marvin-j97 Jun 4, 2025
661b067
refactor: data block binary search
marvin-j97 Jun 4, 2025
030b99d
expose full block index' index block
marvin-j97 Jun 4, 2025
3cdabe2
restore run reader
marvin-j97 Jun 4, 2025
8cf696a
index block iterator
marvin-j97 Jun 4, 2025
9cb6e34
full block index iterator
marvin-j97 Jun 4, 2025
c07d248
optimize L0 after flush
marvin-j97 Jun 4, 2025
3085640
segment iter WIP
marvin-j97 Jun 4, 2025
09e33c5
restore tree range
marvin-j97 Jun 4, 2025
cb6efb5
adjust memtable point read
marvin-j97 Jun 4, 2025
98289ed
temporarily gut compaction strategies
marvin-j97 Jun 4, 2025
ed26c0a
adjust blob tree GC reader
marvin-j97 Jun 4, 2025
36d1f89
change assertion message
marvin-j97 Jun 4, 2025
06c04ea
fmt & fix
marvin-j97 Jun 4, 2025
6b883d5
clippy
marvin-j97 Jun 4, 2025
0b0cdea
clippy
marvin-j97 Jun 4, 2025
6cc7a77
refactor
marvin-j97 Jun 4, 2025
4e84ba1
fix
marvin-j97 Jun 4, 2025
afc149b
increase msrv idc
marvin-j97 Jun 4, 2025
c608676
bump
marvin-j97 Jun 4, 2025
309ef3d
temp disable bytes feature flag
marvin-j97 Jun 5, 2025
349964e
fix: kv example
marvin-j97 Jun 5, 2025
91eaa95
fork double-ended-peekable
marvin-j97 Jun 26, 2025
159c0c1
remove double-ended-peekable
marvin-j97 Jun 26, 2025
d3e48f8
refactor
marvin-j97 Jun 26, 2025
e793984
block decoder
marvin-j97 Jun 26, 2025
eefcd78
unsafe feature flag in hash index
marvin-j97 Jun 26, 2025
88104d2
use new double ended peekable
marvin-j97 Jun 26, 2025
1243783
crate-level unwrap macro
marvin-j97 Jun 26, 2025
2c0cdee
refactor: block encoder
marvin-j97 Jun 26, 2025
761e39f
refactor: data block iter
marvin-j97 Jun 26, 2025
3ab995a
refactor: data block
marvin-j97 Jun 26, 2025
5a7e44d
refactor: index block iter
marvin-j97 Jun 26, 2025
5a08d9b
refactor: index block
marvin-j97 Jun 26, 2025
a6100e2
adjust block index
marvin-j97 Jun 26, 2025
ae73138
adjust segment iter
marvin-j97 Jun 26, 2025
9611819
refactor: segment scanner
marvin-j97 Jun 26, 2025
ed5d05b
SliceIndexes struct
marvin-j97 Jun 26, 2025
e03248c
seek index in segment range read ops
marvin-j97 Jun 26, 2025
ff36871
wip
marvin-j97 Jun 26, 2025
36d4976
fix: clippy
marvin-j97 Jun 26, 2025
c0db138
fix: clippy
marvin-j97 Jun 26, 2025
397df74
Merge pull request #134 from fjall-rs/v3/level-manifest-versioning
marvin-j97 Jun 26, 2025
55f56cf
Merge remote-tracking branch 'origin/main' into 3.0.0
marvin-j97 Jun 26, 2025
73f4c5c
remove some logging
marvin-j97 Jun 26, 2025
ab9baf7
refactor
marvin-j97 Jun 26, 2025
a26c8cd
wip
marvin-j97 Jun 27, 2025
ec223a5
refactor
marvin-j97 Jun 27, 2025
72ad26a
refactor
marvin-j97 Jun 27, 2025
f7e09d9
disable monkey temporarily
marvin-j97 Jun 29, 2025
6d09a66
perf: correctly seek index on range read
marvin-j97 Jun 29, 2025
f01057e
bump msrv to 1.82
marvin-j97 Jul 1, 2025
6428135
wip
marvin-j97 Jul 1, 2025
363e647
doc
marvin-j97 Jul 1, 2025
edf9a90
perf: optimize aggregate_key_range
marvin-j97 Jul 14, 2025
0fe25be
rename macro
marvin-j97 Jul 14, 2025
bc8dc6d
update import
marvin-j97 Jul 14, 2025
508eb21
wip
marvin-j97 Jul 14, 2025
6d21314
remove unnecessary assertions
marvin-j97 Jul 14, 2025
1178fc5
add debug asserts
marvin-j97 Jul 14, 2025
60bb3a1
comment out debug logs
marvin-j97 Jul 14, 2025
1b34572
block decoder docs
marvin-j97 Jul 14, 2025
ac64c9c
wip
marvin-j97 Jul 14, 2025
c220b34
wip
marvin-j97 Jul 14, 2025
cb04513
disallow index blocks with restart interval > 1 for now
marvin-j97 Jul 14, 2025
474e7f5
wip
marvin-j97 Jul 14, 2025
4a41c9c
add more metadata props
marvin-j97 Jul 14, 2025
802fc55
wip
marvin-j97 Jul 14, 2025
1733447
perf: correctly seek segment iter
marvin-j97 Jul 14, 2025
27d7ece
update ingestion
marvin-j97 Jul 14, 2025
ac9378a
cap leveled compaction to 100 segments for now
marvin-j97 Jul 14, 2025
88a1958
reimplement movedown "compaction" strategy
marvin-j97 Jul 14, 2025
e900848
add metrics struct
marvin-j97 Jul 14, 2025
6b58416
add pinned index blocks stat
marvin-j97 Jul 14, 2025
6d20df2
add block IO and bloom filter metrics
marvin-j97 Jul 14, 2025
ffb8ed8
Merge remote-tracking branch 'origin/main' into 3.0.0
marvin-j97 Jul 14, 2025
b43673d
fix: segment range edge case
marvin-j97 Jul 14, 2025
fdc1077
add feature flags
marvin-j97 Jul 14, 2025
8642e31
add debug derive
marvin-j97 Jul 15, 2025
5f37a93
fix block iter
marvin-j97 Jul 15, 2025
80b7d52
restore hash index
marvin-j97 Jul 15, 2025
3c58f6b
add some test cases
marvin-j97 Jul 15, 2025
ffd5078
fix: doctest
marvin-j97 Jul 15, 2025
4dc0618
feat: add per-block read checksum check
marvin-j97 Jul 19, 2025
ded3456
wip
marvin-j97 Jul 20, 2025
89e7b71
allow block encoder to write into external buffer
marvin-j97 Jul 20, 2025
9328d25
print block handle in checksum
marvin-j97 Jul 20, 2025
c0e4abb
data block encoder allow buffer reuse
marvin-j97 Jul 20, 2025
b51412c
index block encode allow buffer reuse
marvin-j97 Jul 20, 2025
9cefbb2
rename
marvin-j97 Jul 20, 2025
955ddb5
reuse block encode buffer in segment construction
marvin-j97 Jul 20, 2025
8d18063
version iter double ended
marvin-j97 Jul 22, 2025
f2c9a52
Update mod.rs
marvin-j97 Jul 23, 2025
b706f55
wip
marvin-j97 Jul 23, 2025
23d3964
128-bit checksum, block type in block header
marvin-j97 Jul 23, 2025
d61e65f
Merge remote-tracking branch 'origin/3.0.0' into 3.0.0
marvin-j97 Jul 23, 2025
a3c34d5
Merge branch 'main' into 3.0.0
marvin-j97 Jul 23, 2025
807d885
fmt
marvin-j97 Jul 23, 2025
f9f6364
license
marvin-j97 Jul 27, 2025
3970622
test: block header serde roundtrip
marvin-j97 Jul 27, 2025
c168872
fix: optimize_runs
marvin-j97 Jul 27, 2025
a1f41dc
gitignore
marvin-j97 Jul 27, 2025
88b6b4b
Merge branch '3.0.0' into feat/blocked-bloom
marvin-j97 Jul 27, 2025
0752338
wip: bloom
marvin-j97 Jul 27, 2025
d376265
wip
marvin-j97 Jul 27, 2025
5211d73
Merge branch '3.0.0' into feat/blocked-bloom
marvin-j97 Jul 27, 2025
0a7b827
bloom filters adjustments
marvin-j97 Jul 30, 2025
16e7877
wip
marvin-j97 Jul 30, 2025
5f5394a
microbench: bloom speed
marvin-j97 Jul 31, 2025
f16167f
use only 1 hash in bloom construction
marvin-j97 Aug 1, 2025
acf7cc3
perf: zero seqnos if below GC watermark
marvin-j97 Aug 8, 2025
ea573b0
fix: lint
marvin-j97 Aug 8, 2025
fddc524
doc
marvin-j97 Aug 9, 2025
e829f5d
disable monkey temporarily
marvin-j97 Aug 9, 2025
4d1798f
perf: replace busy_levels with level_is_busy
marvin-j97 Aug 9, 2025
d09b8fa
wip
marvin-j97 Aug 9, 2025
4e96ed1
fix
marvin-j97 Aug 9, 2025
2367d5a
reimplement fifo compaction
marvin-j97 Aug 9, 2025
ef97192
change compaction names
marvin-j97 Aug 9, 2025
1c93dfb
leveled compaction scoring
marvin-j97 Aug 9, 2025
61aa691
fix: major compaction test
marvin-j97 Aug 9, 2025
d0f5077
refactor: add crate-level hash fns
marvin-j97 Aug 12, 2025
4c99552
update msrv
marvin-j97 Aug 12, 2025
0399e29
use new crate-level hash fns
marvin-j97 Aug 13, 2025
d469dbb
fix: FIFO compaction
marvin-j97 Aug 13, 2025
5a1afea
wip
marvin-j97 Aug 13, 2025
266e3a9
add hash index config support to segment writer
marvin-j97 Aug 13, 2025
d649a2b
wip
marvin-j97 Aug 13, 2025
6f9dfdb
also configure segment writer in compactions correctly
marvin-j97 Aug 13, 2025
a9c88a8
add version GC
marvin-j97 Aug 13, 2025
2a062ef
handle point read linear scan more nicely
marvin-j97 Aug 13, 2025
3302288
wip
marvin-j97 Aug 13, 2025
4ba9bd8
add safety comment to binary index get
marvin-j97 Aug 13, 2025
b798572
doc
marvin-j97 Aug 13, 2025
7e1146a
remove url reference
marvin-j97 Aug 13, 2025
075c09b
gitignore
marvin-j97 Aug 13, 2025
dcd4d39
remove miniz
marvin-j97 Aug 13, 2025
6d986b1
update CompressionType serde
marvin-j97 Aug 13, 2025
9b6aa44
fix
marvin-j97 Aug 13, 2025
180209c
update deps
marvin-j97 Aug 13, 2025
7ee5518
wip
marvin-j97 Aug 13, 2025
b2d8207
visibility
marvin-j97 Aug 13, 2025
26e315d
line
marvin-j97 Aug 13, 2025
faecf9f
fix temporary
marvin-j97 Aug 13, 2025
6d512e7
wip
marvin-j97 Aug 14, 2025
6d6036a
refactor: rename file
marvin-j97 Aug 14, 2025
0a52bb6
refactor: remove old file
marvin-j97 Aug 14, 2025
ad3bcb1
refactor: clippy
marvin-j97 Aug 14, 2025
b50d7fa
refactor: clippy
marvin-j97 Aug 14, 2025
3859c0e
refactor: clippy
marvin-j97 Aug 14, 2025
ae8cada
refactor: clippy
marvin-j97 Aug 14, 2025
43d7561
refactor: clippy
marvin-j97 Aug 14, 2025
e9bef55
refactor: clippy
marvin-j97 Aug 14, 2025
0cca26a
hide unused module
marvin-j97 Aug 14, 2025
5be69ff
refactor: clippy
marvin-j97 Aug 14, 2025
45ac853
refactor: clippy
marvin-j97 Aug 14, 2025
d81abcf
wip
marvin-j97 Aug 14, 2025
70953fa
refactor: clippy
marvin-j97 Aug 14, 2025
1f61440
refactor: clippy
marvin-j97 Aug 14, 2025
a6d3149
perf: use binary search in run overlaps/contains
marvin-j97 Aug 18, 2025
014069d
wip
marvin-j97 Aug 18, 2025
47384e1
perf: leveled compaction lazy evaluation
marvin-j97 Aug 18, 2025
c274825
Merge remote-tracking branch 'origin/main' into 3.0.0
marvin-j97 Aug 18, 2025
2cd07bf
wip
marvin-j97 Aug 18, 2025
8795cb7
fix: lifetime
marvin-j97 Aug 18, 2025
78ae7ce
adjust bloom_speed bench
marvin-j97 Aug 24, 2025
dc444b3
microbench: hash fns
marvin-j97 Aug 24, 2025
e986eb4
microbench: fractional cascading in segment indexing
marvin-j97 Aug 24, 2025
7262200
microbench: bloom fpr
marvin-j97 Aug 24, 2025
2f1d057
microbench: block loading
marvin-j97 Aug 24, 2025
9551bb8
microbench: block hash index
marvin-j97 Aug 24, 2025
5923487
microbench: block binary index
marvin-j97 Aug 24, 2025
6a3df3f
remove old test case
marvin-j97 Aug 24, 2025
d86c351
doc
marvin-j97 Aug 24, 2025
de4cb4b
use File::create_new wherever possible
marvin-j97 Aug 24, 2025
3171c66
fix: 32-bit overflow in leveled compaction
marvin-j97 Aug 24, 2025
6bba3f4
add microbench script
marvin-j97 Aug 25, 2025
7ead943
feat: drop range, #148
marvin-j97 Aug 25, 2025
0e0f65b
clippy
marvin-j97 Aug 25, 2025
d7e0d81
refactor: seqno generator, information hiding
marvin-j97 Aug 25, 2025
de4b8b8
impl Segment::tombstone_count
marvin-j97 Aug 25, 2025
dff4401
fix major compaction docs
marvin-j97 Aug 25, 2025
7cdadf2
fix
marvin-j97 Aug 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
matrix:
rust_version:
- stable
- "1.76.0" # MSRV
- "1.89.0" # MSRV
os:
- ubuntu-latest
- windows-latest
Expand Down Expand Up @@ -79,4 +79,4 @@ jobs:
- name: cross test
run: |
cargo install cross
cross test -r --features lz4,miniz --target ${{ matrix.target }}
cross test -r --features lz4 --target ${{ matrix.target }}
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,9 @@ Cargo.lock
.bench

mutants*
profile.json
fuzz*/**/out*

microbench/**/data.jsonl
microbench/**/*.svg

45 changes: 19 additions & 26 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
name = "lsm-tree"
description = "A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs)"
license = "MIT OR Apache-2.0"
version = "2.10.3"
version = "3.0.0"
edition = "2021"
rust-version = "1.76.0"
rust-version = "1.82.0"
readme = "README.md"
include = ["src/**/*", "LICENSE-APACHE", "LICENSE-MIT", "README.md"]
repository = "https://github.com/fjall-rs/lsm-tree"
Expand All @@ -19,40 +19,33 @@ path = "src/lib.rs"
[features]
default = []
lz4 = ["dep:lz4_flex"]
miniz = ["dep:miniz_oxide"]
bytes = ["value-log/bytes"]
# bytes = ["value-log/bytes"] # TODO: restore
use_unsafe = []
bloom_use_unsafe = []
metrics = []

[dependencies]
byteorder = "1.5.0"
crossbeam-skiplist = "0.1.3"
double-ended-peekable = "0.1.0"
enum_dispatch = "0.3.13"
guardian = "1.1.0"
interval-heap = "0.0.5"
log = "0.4.22"
lz4_flex = { version = "=0.11.3", optional = true, default-features = false }
miniz_oxide = { version = "0.8.0", optional = true }
path-absolutize = "3.1.1"
quick_cache = { version = "0.6.5", default-features = false, features = [] }
rustc-hash = "2.0.0"
self_cell = "1.0.4"
tempfile = "3.12.0"
value-log = { version = "~1.9", default-features = false, features = [] }
log = "0.4.27"
lz4_flex = { version = "0.11.5", optional = true, default-features = false }
quick_cache = { version = "0.6.16", default-features = false, features = [] }
rustc-hash = "2.1.1"
self_cell = "1.2.0"
tempfile = "3.20.0"
value-log = { git = "https://github.com/fjall-rs/value-log", branch = "v2", default-features = false, features = [
] }
varint-rs = "2.2.0"
xxhash-rust = { version = "0.8.12", features = ["xxh3"] }
xxhash-rust = { version = "0.8.15", features = ["xxh3"] }

[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
fs_extra = "1.3.0"
nanoid = "0.4.0"
rand = "0.9.0"
test-log = "0.2.16"

# half 2.5.0 has MSRV 1.81
half = "=2.4.0"

# rayon has MSRV 1.80
rayon-core = "=1.12.1"
rand = "0.9.2"
test-log = "0.2.18"

[package.metadata.cargo-all-features]
denylist = []
Expand Down Expand Up @@ -85,13 +78,13 @@ required-features = []
name = "block"
harness = false
path = "benches/block.rs"
required-features = ["lz4", "miniz"]
required-features = ["lz4"]

[[bench]]
name = "tree"
harness = false
path = "benches/tree.rs"
required-features = ["lz4", "miniz"]
required-features = ["lz4"]

[[bench]]
name = "level_manifest"
Expand Down
34 changes: 16 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[![CI](https://github.com/fjall-rs/lsm-tree/actions/workflows/test.yml/badge.svg)](https://github.com/fjall-rs/lsm-tree/actions/workflows/test.yml)
[![docs.rs](https://img.shields.io/docsrs/lsm-tree?color=green)](https://docs.rs/lsm-tree)
[![Crates.io](https://img.shields.io/crates/v/lsm-tree?color=blue)](https://crates.io/crates/lsm-tree)
![MSRV](https://img.shields.io/badge/MSRV-1.76.0-blue)
![MSRV](https://img.shields.io/badge/MSRV-1.89.0-blue)
[![dependency status](https://deps.rs/repo/github/fjall-rs/lsm-tree/status.svg)](https://deps.rs/repo/github/fjall-rs/lsm-tree)

A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs) in Rust.
Expand All @@ -19,21 +19,23 @@ A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs) in Rus

This is the most feature-rich LSM-tree implementation in Rust! It features:

- Thread-safe BTreeMap-like API
- [99.9% safe](./UNSAFE.md) & stable Rust
- Block-based tables with compression support
- Thread-safe `BTreeMap`-like API
- Mostly [safe](./UNSAFE.md) & 100% stable Rust
- Block-based tables with compression support & prefix truncation
- Optional block hash indexes in blocks for faster point lookups [[3]](#footnotes)
- Per-level filter/index block pinning configuration
- Range & prefix searching with forward and reverse iteration
- Size-tiered, (concurrent) Leveled and FIFO compaction
- Multi-threaded flushing (immutable/sealed memtables)
- Partitioned block index to reduce memory footprint and keep startup time short [[1]](#footnotes)
- Block caching to keep hot data in memory
- Bloom filters to increase point lookup performance
- Snapshots (MVCC)
- *AMQ* filters (currently Bloom filters) to improve point lookup performance
- Snapshots (*MVCC*)
- Optionally partitioned block index & filters for better cache efficiency [[1]](#footnotes)
- Size-tiered, (concurrent) Leveled and FIFO compaction
- Multi-threaded flushing (immutable/sealed memtables)
- Key-value separation (optional) [[2]](#footnotes)
- Single deletion tombstones ("weak" deletion)

Keys are limited to 65536 bytes, values are limited to 2^32 bytes. As is normal with any kind of storage
engine, larger keys and values have a bigger performance impact.
Keys are limited to 65536 bytes, values are limited to 2^32 bytes.
As is normal with any kind of storage engine, larger keys and values have a bigger performance impact.

## Feature flags

Expand All @@ -43,12 +45,6 @@ Allows using `LZ4` compression, powered by [`lz4_flex`](https://github.com/PSeit

*Disabled by default.*

### miniz

Allows using `DEFLATE/zlib` compression, powered by [`miniz_oxide`](https://github.com/Frommi/miniz_oxide).

*Disabled by default.*

### bytes

Uses [`bytes`](https://github.com/tokio-rs/bytes) as the underlying `Slice` type.
Expand All @@ -66,7 +62,7 @@ Future breaking changes will result in a major version bump and a migration path
## Run unit benchmarks

```bash
cargo bench --features lz4,miniz
cargo bench --features lz4
```

## License
Expand All @@ -80,3 +76,5 @@ All contributions are to be licensed as MIT OR Apache-2.0.
[1] https://rocksdb.org/blog/2017/05/12/partitioned-index-filter.html

[2] https://github.com/facebook/rocksdb/wiki/BlobDB

[3] https://rocksdb.org/blog/2018/08/23/data-block-hash-index.html
16 changes: 14 additions & 2 deletions UNSAFE.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# Unsafe usage

Currently, the project itself only uses one **1** unsafe block (ignoring dependencies which are tested themselves separately):
...

- https://github.com/fjall-rs/lsm-tree/blob/2d8686e873369bd9c4ff2b562ed988c1cea38331/src/binary_search.rs#L23-L25
## Run fuzz testing

```bash
cargo +nightly fuzz run data_block -- -max_len=8000000
cargo +nightly fuzz run index_block -- -max_len=8000000
cargo +nightly fuzz run partition_point -- -max_len=1000000
```

## Run mutation testing

```bash
cargo-mutants mutants --test-tool=nextest
```
48 changes: 20 additions & 28 deletions benches/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ use criterion::{criterion_group, criterion_main, Criterion};
use lsm_tree::{
coding::Encode,
segment::{
block::{header::Header as BlockHeader, ItemSize},
block::{header::Header as BlockHeader, offset::BlockOffset, ItemSize},
meta::CompressionType,
value_block::{BlockOffset, ValueBlock},
value_block::ValueBlock,
},
Checksum, InternalValue,
};
use rand::Rng;
use std::io::Write;

/* fn value_block_size(c: &mut Criterion) {
Expand Down Expand Up @@ -50,19 +51,11 @@ fn value_block_find(c: &mut Criterion) {
for item_count in [10, 100, 1_000, 10_000] {
let mut items = vec![];

for seqno in (0..(item_count - 2)).rev() {
for item in 0u64..item_count {
items.push(InternalValue::from_components(
*b"a",
*b"a",
seqno,
lsm_tree::ValueType::Value,
));
}
for seqno in (0..2).rev() {
items.push(InternalValue::from_components(
*b"b",
*b"b",
seqno,
item.to_be_bytes(),
b"",
0,
lsm_tree::ValueType::Value,
));
}
Expand All @@ -78,22 +71,29 @@ fn value_block_find(c: &mut Criterion) {
},
};

let mut rng = rand::rng();

group.bench_function(format!("{item_count} items (linear)"), |b| {
b.iter(|| {
let needle = rng.random_range(0..item_count).to_be_bytes();

let item = block
.items
.iter()
.find(|item| &*item.key.user_key == b"b")
.find(|item| &*item.key.user_key == needle)
.cloned()
.unwrap();
assert_eq!(item.key.seqno, 1);

assert_eq!(item.key.user_key, needle);
})
});

group.bench_function(format!("{item_count} items (binary search)"), |b| {
b.iter(|| {
let item = block.get_latest(b"b").unwrap();
assert_eq!(item.key.seqno, 1);
let needle = rng.random_range(0..item_count).to_be_bytes();

let item = block.get_latest(&needle).unwrap();
assert_eq!(item.key.user_key, needle);
})
});
}
Expand All @@ -102,11 +102,7 @@ fn value_block_find(c: &mut Criterion) {
fn encode_block(c: &mut Criterion) {
let mut group = c.benchmark_group("Encode block");

for comp_type in [
CompressionType::None,
CompressionType::Lz4,
CompressionType::Miniz(3),
] {
for comp_type in [CompressionType::None, CompressionType::Lz4] {
for block_size in [4, 8, 16, 32, 64, 128] {
let block_size = block_size * 1_024;

Expand Down Expand Up @@ -145,11 +141,7 @@ fn encode_block(c: &mut Criterion) {
fn load_value_block_from_disk(c: &mut Criterion) {
let mut group = c.benchmark_group("Load block from disk");

for comp_type in [
CompressionType::None,
CompressionType::Lz4,
CompressionType::Miniz(3),
] {
for comp_type in [CompressionType::None, CompressionType::Lz4] {
for block_size in [4, 8, 16, 32, 64, 128] {
let block_size = block_size * 1_024;

Expand Down
Loading
Loading