Skip to content

Commit 31126cc

Browse files
author
Brian Sparrow
committed
Merge pull request #87 from basho/bs-merge-expiration-change
Change behavior of merge when merging for data expiration
2 parents 2d8f1b9 + fd9a29c commit 31126cc

File tree

2 files changed

+68
-27
lines changed

2 files changed

+68
-27
lines changed

src/bitcask.erl

+65-25
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,6 @@ open(Dirname, Opts) ->
140140
{error, Reason}
141141
end.
142142

143-
144-
145-
146143
%% @doc Close a bitcask data store and flush any pending writes to disk.
147144
-spec close(reference()) -> ok.
148145
close(Ref) ->
@@ -452,34 +449,39 @@ iterator_release(Ref) ->
452449
%% into a more compact form.
453450
-spec merge(Dirname::string()) -> ok.
454451
merge(Dirname) ->
455-
merge(Dirname, [], readable_files(Dirname)).
452+
merge(Dirname, [], {readable_files(Dirname), []}).
456453

457454
%% @doc Merge several data files within a bitcask datastore
458455
%% into a more compact form.
459456
-spec merge(Dirname::string(), Opts::[_]) -> ok.
460457
merge(Dirname, Opts) ->
461-
merge(Dirname, Opts, readable_files(Dirname)).
458+
merge(Dirname, Opts, {readable_files(Dirname), []}).
462459

463460
%% @doc Merge several data files within a bitcask datastore
464461
%% into a more compact form.
465462
-spec merge(Dirname::string(), Opts::[_], FilesToMerge::[string()]) -> ok.
466463
merge(_Dirname, _Opts, []) ->
467464
ok;
468-
merge(Dirname, Opts, FilesToMerge0) ->
465+
merge(Dirname,Opts,FilesToMerge) when is_list(FilesToMerge) ->
466+
merge(Dirname,Opts,{FilesToMerge,[]});
467+
merge(_Dirname, _Opts, {[],_}) ->
468+
ok;
469+
merge(Dirname, Opts, {FilesToMerge0, ExpiredFiles0}) ->
469470
%% Make sure bitcask app is started so we can pull defaults from env
470471
ok = start_app(),
471-
472472
%% Filter the files to merge and ensure that they all exist. It's
473473
%% possible in some circumstances that we'll get an out-of-date
474474
%% list of files.
475475
FilesToMerge = [F || F <- FilesToMerge0,
476476
filelib:is_file(F)],
477-
merge1(Dirname, Opts, FilesToMerge).
477+
ExpiredFiles = [F || F <- ExpiredFiles0,
478+
filelib:is_file(F)],
479+
merge1(Dirname, Opts, FilesToMerge, ExpiredFiles).
478480

479481
%% Inner merge function, assumes that bitcask is running and all files exist.
480-
merge1(_Dirname, _Opts, []) ->
482+
merge1(_Dirname, _Opts, [], []) ->
481483
ok;
482-
merge1(Dirname, Opts, FilesToMerge) ->
484+
merge1(Dirname, Opts, FilesToMerge, ExpiredFiles) ->
483485
%% Test to see if this is a complete or partial merge
484486
Partial = not(lists:usort(readable_files(Dirname)) ==
485487
lists:usort(FilesToMerge)),
@@ -550,18 +552,22 @@ merge1(Dirname, Opts, FilesToMerge) ->
550552
TooNew = [F#file_status.filename ||
551553
F <- Summary,
552554
F#file_status.newest_tstamp >= MergeStart],
553-
InFiles = lists:reverse(
554-
lists:foldl(fun(F, Acc) ->
555+
{InFiles,InExpiredFiles} = lists:foldl(fun(F, {InFilesAcc,InExpiredAcc} = Acc) ->
555556
case lists:member(F#filestate.filename,
556557
TooNew) of
557558
false ->
558-
[F|Acc];
559+
case lists:member(F#filestate.filename,
560+
ExpiredFiles) of
561+
false ->
562+
{[F|InFilesAcc],InExpiredAcc};
563+
true ->
564+
{InFilesAcc,[F|InExpiredAcc]}
565+
end;
559566
true ->
560567
bitcask_fileops:close(F),
561568
Acc
562569
end
563-
end, [], InFiles1)),
564-
570+
end, {[],[]}, InFiles1),
565571
%% Setup our first output merge file and update the merge lock accordingly
566572
{ok, Outfile} = bitcask_fileops:create_file(Dirname, Opts),
567573
ok = bitcask_lockops:write_activefile(
@@ -585,6 +591,7 @@ merge1(Dirname, Opts, FilesToMerge) ->
585591
opts = Opts },
586592

587593
%% Finally, start the merge process
594+
ExpiredFilesFinished = expiry_merge(InExpiredFiles, LiveKeyDir, []),
588595
State1 = merge_files(State),
589596

590597
%% Make sure to close the final output file
@@ -593,11 +600,16 @@ merge1(Dirname, Opts, FilesToMerge) ->
593600

594601
%% Close the original input files, schedule them for deletion,
595602
%% close keydirs, and release our lock
596-
[bitcask_fileops:close(F) || F <- State#mstate.input_files],
603+
[bitcask_fileops:close(F) || F <- State#mstate.input_files ++ ExpiredFilesFinished],
597604
{_, _, _, {IterGeneration, _, _}} = bitcask_nifs:keydir_info(LiveKeyDir),
598-
FileNames = [F#filestate.filename || F <- State#mstate.input_files],
605+
FileNames = [F#filestate.filename || F <- State#mstate.input_files ++ ExpiredFilesFinished],
599606
[catch set_setuid_bit(F) || F <- FileNames],
600607
bitcask_merge_delete:defer_delete(Dirname, IterGeneration, FileNames),
608+
if InFiles == [] ->
609+
bitcask_fileops:delete(Outfile);
610+
true ->
611+
ok
612+
end,
601613

602614
%% Explicitly release our keydirs instead of waiting for GC
603615
bitcask_nifs:keydir_release(LiveKeyDir),
@@ -610,8 +622,8 @@ consider_for_merge(FragTrigger, DeadBytesTrigger, ExpirationGraceTime) ->
610622
fun (F) ->
611623
(F#file_status.fragmented >= FragTrigger)
612624
orelse (F#file_status.dead_bytes >= DeadBytesTrigger)
613-
orelse ( (F#file_status.oldest_tstamp > 0) %% means that the file has data
614-
andalso (F#file_status.oldest_tstamp < ExpirationGraceTime)
625+
orelse ((F#file_status.oldest_tstamp > 0) andalso %% means that the file has data
626+
(F#file_status.newest_tstamp < ExpirationGraceTime)
615627
)
616628
end.
617629

@@ -684,9 +696,17 @@ needs_merge(Ref) ->
684696
_ ->
685697
ok
686698
end,
687-
688699
FileNames = [Filename || {Filename, _Reasons} <- MergableFiles],
689-
{true, FileNames};
700+
F = fun(X) ->
701+
case X of
702+
{data_expired,_,_} ->
703+
true;
704+
_ ->
705+
false
706+
end
707+
end,
708+
ExpiredFiles = [Filename || {Filename, Reasons} <- MergableFiles, lists:any(F,Reasons)],
709+
{true, {FileNames, ExpiredFiles}};
690710
false ->
691711
false
692712
end.
@@ -732,8 +752,8 @@ small_file_threshold(Opts) ->
732752

733753
expired_threshold(Cutoff) ->
734754
fun(F) ->
735-
if F#file_status.oldest_tstamp < Cutoff ->
736-
[{oldest_tstamp, F#file_status.oldest_tstamp, Cutoff}];
755+
if F#file_status.newest_tstamp < Cutoff ->
756+
[{data_expired, F#file_status.newest_tstamp, Cutoff}];
737757
true ->
738758
[]
739759
end
@@ -1291,6 +1311,26 @@ poll_deferred_delete_queue_empty() ->
12911311
_ -> receive after 1100 -> poll_deferred_delete_queue_empty() end
12921312
end.
12931313

1314+
%% Internal merge function for cache_merge functionality.
1315+
expiry_merge([], _LiveKeyDir, Acc) ->
1316+
Acc;
1317+
1318+
expiry_merge([File | Files], LiveKeyDir, Acc0) ->
1319+
FileId = bitcask_fileops:file_tstamp(File),
1320+
Fun = fun(K, Tstamp, {Offset, _TotalSz}, Acc) ->
1321+
bitcask_nifs:keydir_remove(LiveKeyDir, K, Tstamp, FileId, Offset),
1322+
Acc
1323+
end,
1324+
case bitcask_fileops:fold_keys(File, Fun, ok, default) of
1325+
{error, Reason} ->
1326+
error_logger:error_msg("Error folding keys for ~p: ~p\n", [File#filestate.filename,Reason]),
1327+
Acc = Acc0;
1328+
_ ->
1329+
error_logger:info_msg("All keys expired in: ~p scheduling file for deletion\n", [File#filestate.filename]),
1330+
Acc = lists:append(Acc0, [File])
1331+
end,
1332+
expiry_merge(Files, LiveKeyDir, Acc).
1333+
12941334
%% ===================================================================
12951335
%% EUnit tests
12961336
%% ===================================================================
@@ -1700,9 +1740,9 @@ delete_partial_merge_test() ->
17001740
%% selective merge, hit all of the files with deletes but not
17011741
%% all of the ones with deleted data
17021742
timer:sleep(1100),
1703-
ok = merge("/tmp/bc.test.pardel",[],lists:reverse(lists:nthtail(2,
1743+
ok = merge("/tmp/bc.test.pardel",[],{lists:reverse(lists:nthtail(2,
17041744
lists:reverse(readable_files(
1705-
"/tmp/bc.test.pardel"))))),
1745+
"/tmp/bc.test.pardel")))),[]}),
17061746

17071747
%% Verify we've now only got one item left
17081748
B2 = bitcask:open("/tmp/bc.test.pardel"),

src/bitcask_merge_worker.erl

+3-2
Original file line numberDiff line numberDiff line change
@@ -132,13 +132,14 @@ do_merge(Args) ->
132132
Start = os:timestamp(),
133133
Result = (catch apply(bitcask, merge, Args)),
134134
ElapsedSecs = timer:now_diff(os:timestamp(), Start) / 1000000,
135+
[_,_,{Pargs,_}] = Args,
135136
case Result of
136137
ok ->
137138
error_logger:info_msg("Merged ~p in ~p seconds.\n",
138-
[Args, ElapsedSecs]);
139+
[Pargs, ElapsedSecs]);
139140
{Error, Reason} when Error == error; Error == 'EXIT' ->
140141
error_logger:error_msg("Failed to merge ~p: ~p\n",
141-
[Args, Reason])
142+
[Pargs, Reason])
142143
end;
143144
false ->
144145
ok

0 commit comments

Comments
 (0)