Skip to content

Commit 8599e2f

Browse files
authored
add PersistentDict based on a HAMT (#51164)
The implementation is based on a [Hash Array Mapped Trie (HAMT)](https://en.wikipedia.org/wiki/Hash_array_mapped_trie) following [Bagwell (2000)](http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf). A HAMT uses a fixed branching factor (commonly 32) together with each node being sparse. In order to search for an entry we take the hash of the key and chunk it up into blocks, with a branching factor of 32 each block is 5 bits. We use those 5 bits to calculate the index inside the node and use a bitmap within the node to keep track if an element is already set. This makes search a `log(32, n)` operation. Persistency is implemented by path-copying. When we insert/delete a value into the HAMT we copy each node along the path into a new HAMT, all other nodes are shared with the previous HAMT. A noteable implementation choice is that I didn't add a (resizeable) root table. Normally this root table is dense and uses the first `t` bits to calculate an index within. This makes large HAMT a bit cheaper since the root-table effectivly folds multiple lookup steps into one. It does hurt persistent use-cases since path-copying means that we also copy the root node/table. Importantly the HAMT itself is not immutable/persistent, the use of it as part of the `PersistentDict` is. Direct mutation of the underlying data breaks the persistentcy invariants. One could use the HAMT to implement a non-persistent dictionary (or other datastructures). As an interesting side-note we could use a related data-structure [Ctrie](http://lamp.epfl.ch/~prokopec/ctries-snapshot.pdf) to implement a concurrent lock-free dictionary. Ctrie also support `O(1)` snapshotting so we could replace the HAMT used here with a Ctrie.
1 parent 27fa5de commit 8599e2f

File tree

4 files changed

+527
-0
lines changed

4 files changed

+527
-0
lines changed

base/dict.jl

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -869,3 +869,145 @@ empty(::ImmutableDict, ::Type{K}, ::Type{V}) where {K, V} = ImmutableDict{K,V}()
869869
_similar_for(c::AbstractDict, ::Type{Pair{K,V}}, itr, isz, len) where {K, V} = empty(c, K, V)
870870
_similar_for(c::AbstractDict, ::Type{T}, itr, isz, len) where {T} =
871871
throw(ArgumentError("for AbstractDicts, similar requires an element type of Pair;\n if calling map, consider a comprehension instead"))
872+
873+
874+
include("hamt.jl")
875+
using .HashArrayMappedTries
876+
const HAMT = HashArrayMappedTries
877+
878+
struct PersistentDict{K,V} <: AbstractDict{K,V}
879+
trie::HAMT.HAMT{K,V}
880+
end
881+
882+
"""
883+
PersistentDict
884+
885+
`PersistentDict` is a dictionary implemented as an hash array mapped trie,
886+
which is optimal for situations where you need persistence, each operation
887+
returns a new dictonary separate from the previous one, but the underlying
888+
implementation is space-efficient and may share storage across multiple
889+
separate dictionaries.
890+
891+
PersistentDict(KV::Pair)
892+
893+
# Examples
894+
895+
```jldoctest
896+
julia> dict = Base.PersistentDict(:a=>1)
897+
Base.PersistentDict{Symbol, Int64} with 1 entry:
898+
:a => 1
899+
900+
julia> dict2 = Base.delete(dict, :a)
901+
Base.PersistentDict{Symbol, Int64}()
902+
903+
julia> dict3 = Base.PersistentDict(dict, :a=>2)
904+
Base.PersistentDict{Symbol, Int64} with 1 entry:
905+
:a => 2
906+
```
907+
"""
908+
PersistentDict
909+
910+
PersistentDict{K,V}() where {K,V} = PersistentDict(HAMT.HAMT{K,V}())
911+
PersistentDict(KV::Pair{K,V}) where {K,V} = PersistentDict(HAMT.HAMT(KV...))
912+
PersistentDict(dict::PersistentDict, pair::Pair) = PersistentDict(dict, pair...)
913+
function PersistentDict(dict::PersistentDict{K,V}, key::K, val::V) where {K,V}
914+
trie = dict.trie
915+
h = hash(key)
916+
found, present, trie, i, bi, top, hs = HAMT.path(trie, key, h, #=persistent=# true)
917+
HAMT.insert!(found, present, trie, i, bi, hs, val)
918+
return PersistentDict(top)
919+
end
920+
921+
function PersistentDict(kv::Pair, rest::Pair...)
922+
dict = PersistentDict(kv)
923+
for kv in rest
924+
key, value = kv
925+
dict = PersistentDict(dict, key, value)
926+
end
927+
return dict
928+
end
929+
930+
eltype(::PersistentDict{K,V}) where {K,V} = Pair{K,V}
931+
932+
function in(key_val::Pair{K,V}, dict::PersistentDict{K,V}, valcmp=(==)) where {K,V}
933+
trie = dict.trie
934+
if HAMT.islevel_empty(trie)
935+
return false
936+
end
937+
938+
key, val = key_val
939+
940+
h = hash(key)
941+
found, present, trie, i, _, _, _ = HAMT.path(trie, key, h)
942+
if found && present
943+
leaf = @inbounds trie.data[i]::HAMT.Leaf{K,V}
944+
return valcmp(val, leaf.val) && return true
945+
end
946+
return false
947+
end
948+
949+
function haskey(dict::PersistentDict{K}, key::K) where K
950+
trie = dict.trie
951+
h = hash(key)
952+
found, present, _, _, _, _, _ = HAMT.path(trie, key, h)
953+
return found && present
954+
end
955+
956+
function getindex(dict::PersistentDict{K,V}, key::K) where {K,V}
957+
trie = dict.trie
958+
if HAMT.islevel_empty(trie)
959+
throw(KeyError(key))
960+
end
961+
h = hash(key)
962+
found, present, trie, i, _, _, _ = HAMT.path(trie, key, h)
963+
if found && present
964+
leaf = @inbounds trie.data[i]::HAMT.Leaf{K,V}
965+
return leaf.val
966+
end
967+
throw(KeyError(key))
968+
end
969+
970+
function get(dict::PersistentDict{K,V}, key::K, default::V) where {K,V}
971+
trie = dict.trie
972+
if HAMT.islevel_empty(trie)
973+
return default
974+
end
975+
h = hash(key)
976+
found, present, trie, i, _, _, _ = HAMT.path(trie, key, h)
977+
if found && present
978+
leaf = @inbounds trie.data[i]::HAMT.Leaf{K,V}
979+
return leaf.val
980+
end
981+
return default
982+
end
983+
984+
function get(default::Callable, dict::PersistentDict{K,V}, key::K) where {K,V}
985+
trie = dict.trie
986+
if HAMT.islevel_empty(trie)
987+
return default
988+
end
989+
h = hash(key)
990+
found, present, trie, i, _, _, _ = HAMT.path(trie, key, h)
991+
if found && present
992+
leaf = @inbounds trie.data[i]::HAMT.Leaf{K,V}
993+
return leaf.val
994+
end
995+
return default()
996+
end
997+
998+
iterate(dict::PersistentDict, state=nothing) = HAMT.iterate(dict.trie, state)
999+
1000+
function delete(dict::PersistentDict{K}, key::K) where K
1001+
trie = dict.trie
1002+
h = hash(key)
1003+
found, present, trie, i, bi, top, _ = HAMT.path(trie, key, h, #=persistent=# true)
1004+
if found && present
1005+
deleteat!(trie.data, i)
1006+
HAMT.unset!(trie, bi)
1007+
end
1008+
return PersistentDict(top)
1009+
end
1010+
1011+
length(dict::PersistentDict) = HAMT.length(dict.trie)
1012+
isempty(dict::PersistentDict) = HAMT.isempty(dict.trie)
1013+
empty(::PersistentDict, ::Type{K}, ::Type{V}) where {K, V} = PersistentDict{K, V}()

0 commit comments

Comments
 (0)