Skip to content

Commit

Permalink
Tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
jesperes committed Jan 26, 2024
1 parent ea1a93a commit 6c85c41
Showing 1 changed file with 23 additions and 9 deletions.
32 changes: 23 additions & 9 deletions src/erlang_1brc.erl
Original file line number Diff line number Diff line change
@@ -1,17 +1,28 @@
-module(erlang_1brc).

-include_lib("stdlib/include/assert.hrl").

-feature(maybe_expr, enable).

-export([ main/1 %% Entrypoint for escript
, run/1 %% Entrypoint for run.sh
]).

-compile({inline, [ {process_temp,2}
, {process_line,3}
-compile({inline, [ {process_temp, 2}
, {process_line, 3}
, {process_station, 2}
]}).

-define(BUFSIZE, 2 * 1024 * 1024).

%% 64k seems to be the smallest buffer we can read and still get all
%% the city names. This should be computed dynamically instead.
-define(MAP_CITIES_BUFSIZE, 64 * 1024).
-define(EXPECTED_NUM_CITIES, 413).

%% Compute a compressed key one byte at a time
-define(KEY(C, Acc), ((C * 17) bxor Acc) bsl 1).

options() ->
[ {file, $f, "file", {string, "measurements.txt"}, "The input file."}
, {eprof, $e, "eprof", undefined, "Run code under eprof."}
Expand Down Expand Up @@ -52,9 +63,10 @@ run(Filename) when is_atom(Filename) ->
run(atom_to_list(Filename));
run(Filename) ->
{Time, _} = timer:tc(fun() -> map_cities(Filename) end),
NumCities = length(get()),
?assertEqual(?EXPECTED_NUM_CITIES, NumCities),
io:format("Mapped ~p citites in ~w ms~n",
[length(get()),
erlang:convert_time_unit(Time, microsecond, millisecond)]),
[NumCities, erlang:convert_time_unit(Time, microsecond, millisecond)]),
try
process_flag(trap_exit, true),
case file:open(Filename, [raw, read, binary]) of
Expand All @@ -79,16 +91,14 @@ run(Filename) ->
map_cities(Filename) ->
case file:open(Filename, [raw, read, binary]) of
{ok, FD} ->
{ok, Bin} = file:pread(FD, 0, ?BUFSIZE),
{ok, Bin} = file:pread(FD, 0, ?MAP_CITIES_BUFSIZE),
map_cities0(Bin, 1);
{error, Reason} ->
io:format("*** Failed to open ~ts: ~p~n", [Filename, Reason]),
flush(),
erlang:halt(1)
end.

-define(KEY(C, Acc), ((C * 17) bxor Acc) bsl 1).

station_key(Station) ->
lists:foldl(fun(C, Acc) -> ?KEY(C, Acc) end,
0, binary_to_list(Station)).
Expand Down Expand Up @@ -245,8 +255,8 @@ process_station(Station) ->

process_station(<<";", Rest/bitstring>>, Station) ->
process_temp(Rest, Station);
process_station(<<C:8, Rest/bitstring>>, StationKey) ->
process_station(Rest, ?KEY(C, StationKey));
process_station(<<C:8, Rest/bitstring>>, Station) ->
process_station(Rest, ?KEY(C, Station)); %% magic happens here
process_station(Bin, _) ->
Bin.

Expand All @@ -262,6 +272,10 @@ process_temp(<<A, B, $., C, Rest/binary>>, Station) ->
process_temp(<<B, $., C, Rest/binary>>, Station) ->
process_line(Rest, Station, ?TO_NUM(B) * 10 + ?TO_NUM(C));
process_temp(Rest, Station) ->
%% This return breaks the match context reuse optimization, but it
%% is only executed at the end of each chunk, so it doesn't really
%% matter much. The first four clauses here are the hot ones, together
%% with the two first ones in process_station/2.
{Rest, Station}.

process_line(Rest, Key, Temp) ->
Expand Down

0 comments on commit 6c85c41

Please sign in to comment.