Skip to content

Commit 2d8f1b9

Browse files
committed
Merge pull request #83 from basho/pevm-fast-fold
Improve fold speed for large files filled with small objects.
2 parents 83ec5b4 + 7094131 commit 2d8f1b9

10 files changed

+507
-176
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@ ebin
44
priv/*.so
55
*.o
66
*.beam
7+
*~
8+
#*#

c_src/bitcask_nifs.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ ERL_NIF_TERM bitcask_nifs_file_pread(ErlNifEnv* env, int argc, const ERL_NIF_TER
215215
ERL_NIF_TERM bitcask_nifs_file_pwrite(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
216216
ERL_NIF_TERM bitcask_nifs_file_read(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
217217
ERL_NIF_TERM bitcask_nifs_file_write(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
218+
ERL_NIF_TERM bitcask_nifs_file_position(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
218219
ERL_NIF_TERM bitcask_nifs_file_seekbof(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
219220

220221
ERL_NIF_TERM errno_atom(ErlNifEnv* env, int error);
@@ -257,6 +258,7 @@ static ErlNifFunc nif_funcs[] =
257258
{"file_pwrite_int", 3, bitcask_nifs_file_pwrite},
258259
{"file_read_int", 2, bitcask_nifs_file_read},
259260
{"file_write_int", 2, bitcask_nifs_file_write},
261+
{"file_position_int", 2, bitcask_nifs_file_position},
260262
{"file_seekbof_int", 1, bitcask_nifs_file_seekbof}
261263
};
262264

@@ -1601,6 +1603,33 @@ ERL_NIF_TERM bitcask_nifs_file_write(ErlNifEnv* env, int argc, const ERL_NIF_TER
16011603
}
16021604
}
16031605

1606+
ERL_NIF_TERM bitcask_nifs_file_position(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
1607+
{
1608+
bitcask_file_handle* handle;
1609+
unsigned long offset_ul;
1610+
1611+
if (enif_get_resource(env, argv[0], bitcask_file_RESOURCE, (void**)&handle) &&
1612+
enif_get_ulong(env, argv[1], &offset_ul))
1613+
{
1614+
1615+
off_t offset = offset_ul;
1616+
off_t new_offset = lseek(handle->fd, offset, SEEK_SET);
1617+
if (new_offset != -1)
1618+
{
1619+
return enif_make_tuple2(env, ATOM_OK, enif_make_ulong(env, new_offset));
1620+
}
1621+
else
1622+
{
1623+
/* Write failed altogether */
1624+
return enif_make_tuple2(env, ATOM_ERROR, errno_atom(env, errno));
1625+
}
1626+
}
1627+
else
1628+
{
1629+
return enif_make_badarg(env);
1630+
}
1631+
}
1632+
16041633
ERL_NIF_TERM bitcask_nifs_file_seekbof(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
16051634
{
16061635
bitcask_file_handle* handle;

include/bitcask.hrl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,8 @@
3737
-define(MAXKEYSIZE, 2#1111111111111111).
3838
-define(MAXVALSIZE, 2#11111111111111111111111111111111).
3939
-define(MAXOFFSET, 16#ffffffffffffffff). % max 64-bit unsigned
40+
41+
%% for hintfile validation
42+
-define(CHUNK_SIZE, 65535).
43+
-define(MIN_CHUNK_SIZE, 1024).
44+
-define(MAX_CHUNK_SIZE, 134217728).

src/bitcask.erl

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -315,21 +315,27 @@ fold_keys(Ref, Fun, Acc0, MaxAge, MaxPut) ->
315315

316316
%% @doc fold over all K/V pairs in a bitcask datastore.
317317
%% Fun is expected to take F(K,V,Acc0) -> Acc
318-
-spec fold(reference(), fun((binary(), binary(), any()) -> any()), any()) -> any() | {error, any()}.
319-
fold(Ref, Fun, Acc0) ->
318+
-spec fold(reference() | record(),
319+
fun((binary(), binary(), any()) -> any()),
320+
any()) -> any() | {error, any()}.
321+
fold(Ref, Fun, Acc0) when is_reference(Ref)->
320322
State = get_state(Ref),
323+
fold(State, Fun, Acc0);
324+
fold(State, Fun, Acc0) ->
321325
MaxAge = get_opt(max_fold_age, State#bc_state.opts) * 1000, % convert from ms to us
322326
MaxPuts = get_opt(max_fold_puts, State#bc_state.opts),
323-
fold(Ref, Fun, Acc0, MaxAge, MaxPuts).
327+
fold(State, Fun, Acc0, MaxAge, MaxPuts).
324328

325329
%% @doc fold over all K/V pairs in a bitcask datastore specifying max age/updates of
326330
%% the frozen keystore.
327331
%% Fun is expected to take F(K,V,Acc0) -> Acc
328-
-spec fold(reference(), fun((binary(), binary(), any()) -> any()), any(),
329-
non_neg_integer() | undefined, non_neg_integer() | undefined) ->
332+
-spec fold(reference() | record(), fun((binary(), binary(), any()) -> any()), any(),
333+
non_neg_integer() | undefined, non_neg_integer() | undefined) ->
330334
any() | {error, any()}.
331-
fold(Ref, Fun, Acc0, MaxAge, MaxPut) ->
335+
fold(Ref, Fun, Acc0, MaxAge, MaxPut) when is_reference(Ref)->
332336
State = get_state(Ref),
337+
fold(State, Fun, Acc0, MaxAge, MaxPut);
338+
fold(State, Fun, Acc0, MaxAge, MaxPut) ->
333339
FrozenFun =
334340
fun() ->
335341
case open_fold_files(State#bc_state.dirname, 3) of
@@ -853,13 +859,15 @@ scan_key_files([Filename | Rest], KeyDir, Acc, CloseFile, EnoentOK) ->
853859
%% Restrictive pattern matching below is intentional
854860
case bitcask_fileops:open_file(Filename) of
855861
{ok, File} ->
862+
FileTstamp = bitcask_fileops:file_tstamp(File),
856863
F = fun(K, Tstamp, {Offset, TotalSz}, _) ->
857864
bitcask_nifs:keydir_put(KeyDir,
858865
K,
859-
bitcask_fileops:file_tstamp(File),
866+
FileTstamp,
860867
TotalSz,
861868
Offset,
862-
Tstamp)
869+
Tstamp,
870+
false)
863871
end,
864872
bitcask_fileops:fold_keys(File, F, undefined, recovery),
865873
if CloseFile == true ->

src/bitcask_file.erl

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ file_read(Pid, Size) ->
6464
file_write(Pid, Bytes) ->
6565
file_request(Pid, {file_write, Bytes}).
6666

67+
file_position(Pid, Position) ->
68+
file_request(Pid, {file_position, Position}).
69+
6770
file_seekbof(Pid) ->
6871
file_request(Pid, file_seekbof).
6972

@@ -106,11 +109,11 @@ handle_call({file_open, Owner, Filename, Opts}, _From, State) ->
106109
IsReadOnly = proplists:get_bool(readonly, Opts),
107110
Mode = case {IsReadOnly, IsCreate} of
108111
{true, _} ->
109-
[read, raw, binary, read_ahead];
112+
[read, raw, binary];
110113
{_, false} ->
111-
[read, write, raw, binary, read_ahead];
114+
[read, write, raw, binary];
112115
{_, true} ->
113-
[read, write, exclusive, raw, binary, read_ahead]
116+
[read, write, exclusive, raw, binary]
114117
end,
115118
[warn("Bitcask file option '~p' not supported~n", [Opt])
116119
|| Opt <- [o_sync],
@@ -146,6 +149,10 @@ handle_call({file_write, Bytes}, From, State=#state{fd=Fd}) ->
146149
check_owner(From, State),
147150
Reply = file:write(Fd, Bytes),
148151
{reply, Reply, State};
152+
handle_call({file_position, Position}, From, State=#state{fd=Fd}) ->
153+
check_owner(From, State),
154+
Reply = file:position(Fd, Position),
155+
{reply, Reply, State};
149156
handle_call(file_seekbof, From, State=#state{fd=Fd}) ->
150157
check_owner(From, State),
151158
{ok, _} = file:position(Fd, bof),

0 commit comments

Comments
 (0)