diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..df53f7d --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +.rebar3 +_build +_checkouts +_vendor +.eunit +*.o +*.beam +*.plt +*.swp +*.swo +.erlang.cookie +ebin +log +erl_crash.dump +.rebar +logs +.idea +*.iml +rebar3.crashdump +*~ diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..ca13855 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,186 @@ +# Apache License +Version 2.0, January 2004 + +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +## 1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +## 2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +## 3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +## 4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +1. You must give any other recipients of the Work or Derivative Works a copy of + this License; and + +2. You must cause any modified files to carry prominent notices stating that + You changed the files; and + +3. You must retain, in the Source form of any Derivative Works that You + distribute, all copyright, patent, trademark, and attribution notices from + the Source form of the Work, excluding those notices that do not pertain to + any part of the Derivative Works; and + +4. If the Work includes a "NOTICE" text file as part of its distribution, then + any Derivative Works that You distribute must include a readable copy of the + attribution notices contained within such NOTICE file, excluding those + notices that do not pertain to any part of the Derivative Works, in at least + one of the following places: within a NOTICE text file distributed as part + of the Derivative Works; within the Source form or documentation, if + provided along with the Derivative Works; or, within a display generated by + the Derivative Works, if and wherever such third-party notices normally + appear. The contents of the NOTICE file are for informational purposes only + and do not modify the License. You may add Your own attribution notices + within Derivative Works that You distribute, alongside or as an addendum to + the NOTICE text from the Work, provided that such additional attribution + notices cannot be construed as modifying the License. + +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +## 5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +## 6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +## 7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, NON- +INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +## 8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +## 9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +Copyright 2024, Jesper Eskilson . + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..12b5852 --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +erlang_1brc +===== + +An escript + +Build +----- + + $ rebar3 escriptize + +Run +--- + + $ _build/default/bin/erlang_1brc diff --git a/rebar.config b/rebar.config new file mode 100644 index 0000000..83e3f2e --- /dev/null +++ b/rebar.config @@ -0,0 +1,12 @@ +{erl_opts, [no_debug_info]}. +{deps, [getopt]}. + +{escript_incl_apps, [erlang_1brc, getopt, ]}. +{escript_main_app, erlang_1brc}. +{escript_name, erlang_1brc}. +{escript_emu_args, "%%! +sbtu +A1\n"}. + +%% Profiles +{profiles, [{test, + [{erl_opts, [debug_info]} + ]}]}. diff --git a/rebar.lock b/rebar.lock new file mode 100644 index 0000000..e45ff61 --- /dev/null +++ b/rebar.lock @@ -0,0 +1,8 @@ +{"1.2.0", +[{<<"getopt">>,{pkg,<<"getopt">>,<<"1.0.3">>},0}]}. +[ +{pkg_hash,[ + {<<"getopt">>, <<"4F3320C1F6F26B2BEC0F6C6446B943EB927A1E6428EA279A1C6C534906EE79F1">>}]}, +{pkg_hash_ext,[ + {<<"getopt">>, <<"7E01DE90AC540F21494FF72792B1E3162D399966EBBFC674B4CE52CB8F49324F">>}]} +]. diff --git a/src/aggregate.erl b/src/aggregate.erl new file mode 100644 index 0000000..3f20dd0 --- /dev/null +++ b/src/aggregate.erl @@ -0,0 +1,58 @@ +-module(aggregate). + +-export([ aggregate_measurements/2 + , chunk_processor/0 + ]). + +aggregate_measurements(Filename, Opts) -> + process_flag(trap_exit, true), + Start = erlang:monotonic_time(), + BufSize = proplists:get_value(bufsize, Opts), + {ok, FD} = prim_file:open(Filename, [read]), + ProcessorPid = proc_lib:start_link(?MODULE, chunk_processor, []), + read_chunks(FD, BufSize, ProcessorPid), + Now = erlang:monotonic_time(), + io:format("All chunks read after ~p secs, waiting for chunk processor~n", + [erlang:convert_time_unit(Now - Start, native, second)]), + wait_for_completion(ProcessorPid). + +read_chunks(FD, BufSize, TargetPid) -> + case prim_file:read(FD, BufSize) of + {ok, Bin} -> + process_chunk(Bin, TargetPid), + read_chunks(FD, BufSize, TargetPid); + eof -> + TargetPid ! eof, + ok + end. + +process_chunk(Chunk, TargetPid) -> + io:format("Processing chunk of size ~p, sending to ~p~n", [byte_size(Chunk), TargetPid]), + TargetPid ! {chunk, Chunk}. + +chunk_processor() -> + proc_lib:init_ack(self()), + io:format("Started chunk processor: ~p~n", [self()]), + chunk_processor_loop(). + +chunk_processor_loop() -> + receive + {chunk, Chunk} -> + io:format("Chunk processor received ~p~n", [byte_size(Chunk)]), + chunk_processor_loop(); + eof -> + %% No more chunks to process + ok; + Other ->1 + io:format("Other: ~p~n", [Other]), + chunk_processor_loop() + end. + +wait_for_completion(ProcessorPid) -> + receive + {'EXIT', Pid, normal} when Pid =:= ProcessorPid -> + io:format("Processor pid finished.~n", []); + X -> + io:format(">>> ~p~n", [X]), + wait_for_completion(ProcessorPid) + end. diff --git a/src/erlang_1brc.app.src b/src/erlang_1brc.app.src new file mode 100644 index 0000000..490db75 --- /dev/null +++ b/src/erlang_1brc.app.src @@ -0,0 +1,14 @@ +{application, erlang_1brc, + [{description, "An escript"}, + {vsn, "0.1.0"}, + {registered, []}, + {applications, + [kernel, + stdlib + ]}, + {env,[]}, + {modules, []}, + + {licenses, ["Apache-2.0"]}, + {links, []} + ]}. diff --git a/src/erlang_1brc.erl b/src/erlang_1brc.erl new file mode 100644 index 0000000..b6655f9 --- /dev/null +++ b/src/erlang_1brc.erl @@ -0,0 +1,92 @@ +-module(erlang_1brc). + +-export([main/1]). + +-include_lib("eunit/include/eunit.hrl"). + +options() -> + [ {file, $f, "file", {string, "measurements.txt"}, "The input file."} + , {io_bench, $i, "io-bench", string, "Perform I/O benchmarking"} + , {repeat, $n, "repeat", {integer, 1}, "Number of iterations."} + , {eprof, $e, "eprof", undefined, "Run code under eprof."} + , {bufsize, $c, "bufsize", {integer, 4096}, "Buffer size."} + ]. + +main(Args) -> + case getopt:parse(options(), Args) of + {ok, {Opts, []}} -> + Iters = proplists:get_value(repeat, Opts), + Time = + case proplists:get_value(eprof, Opts) of + true -> + eprof:start(), + eprof:start_profiling([self()]), + T = bench(fun() -> do_main(Opts) end, Iters), + eprof:stop_profiling(), + eprof:analyze(), + eprof:stop(), + T; + _ -> + bench(fun() -> do_main(Opts) end, Iters) + end, + + io:format("Total elapsed time: ~w ~tcs (~w seconds) (~w iterations)~n", + [Time, 16#b5, + erlang:convert_time_unit(Time, microsecond, second), + Iters]); + {error, Reason} -> + io:format("Failed to parse options: ~p~n", [Reason]), + io:format("~p~n", [getopt:usage(options(), escript:script_name())]) + end. + +do_main(Opts) -> + Filename = proplists:get_value(file, Opts), + case proplists:get_value(io_bench, Opts, false) of + false -> + aggregate:aggregate_measurements(Filename, Opts); + Type -> + ?debugVal(Type), + do_io_bench(list_to_atom(Type), Opts, Filename) + end. + +do_io_bench(readfile, _Opts, Filename) -> + {ok, _Bin} = file:read_file(Filename); +do_io_bench(primfileread, _Opts, Filename) -> + Size = filelib:file_size(Filename), + {ok, FD} = prim_file:open(Filename, [read]), + {ok, _Bin} = prim_file:read(FD, Size), + prim_file:close(FD); +do_io_bench(primfile_chunks, Opts, Filename) -> + read_chunks(Filename, default, Opts); +do_io_bench(primfile_chunks_iterate, Opts, Filename) -> + read_chunks(Filename, iterate, Opts); +do_io_bench(iterate, _Opts, Filename) -> + {ok, Bin} = file:read_file(Filename), + iterate(Bin). + +%% Run Fun() `Iters` times and return the average execution time in +%% microseconds. +bench(Fun, Iters) -> + L = lists:seq(1, Iters), + {Time, _} = timer:tc(fun() -> [Fun() || _ <- L] end), + trunc(Time / Iters). + +iterate(<<>>) -> + ok; +iterate(<<_, Rest/binary>>) -> + iterate(Rest). + +read_chunks(Filename, Type, Opts) -> + {ok, FD} = prim_file:open(Filename, [read]), + do_read_chunks(FD, Type, Opts), + prim_file:close(FD). + +do_read_chunks(FD, Type, Opts) -> + BufSize = proplists:get_value(bufsize, Opts), + case prim_file:read(FD, BufSize) of + eof -> ok; + {ok, _Bin} when Type =:= default -> do_read_chunks(FD, Type, Opts); + {ok, Bin} when Type =:= iterate -> + do_read_chunks(FD, Type, Opts), + iterate(Bin) + end.