update rambo test

AllanZyne · AllanZyne · commit d70f155969d0 · 2024-05-15T03:22:39.000-07:00
diff --git a/examples/rambo.py b/examples/rambo.py
@@ -1,87 +1,222 @@
-import sharpy as sp
-import numpy as np
+"""
 
-def sp_rambo(sp_C1, sp_F1, sp_Q1, sp_output, C1, F1, Q1, output):
+Examples:
+    python rambo.py -nevts 10 -nout 10 -b sharpy -i 10000
 
-    sp_C = 2.0 * sp_C1 - 1.0
-    sp_S = sp.sqrt(1 - sp.square(sp_C))
-    sp_F = 2.0 * sp.pi * sp_F1
-    sp_Q = -sp.log(sp_Q1)
+"""
+import argparse
+import os
+import time as time_mod
+from functools import partial
 
-    sp_output[:, :, 0] = sp.reshape(sp_Q, (10, 10, 1))
-    sp_output[:, :, 1] = sp.reshape(sp_Q * sp_S * sp.sin(sp_F), (10, 10, 1))
-    sp_output[:, :, 2] = sp.reshape(sp_Q * sp_S * sp.cos(sp_F), (10, 10, 1))
-    sp_output[:, :, 3] = sp.reshape(sp_Q * sp_C, (10, 10, 1))
+import numpy
+import sharpy
 
-    C = 2.0 * C1 - 1.0
-    S = np.sqrt(1 - np.square(C))
-    F = 2.0 * np.pi * F1
-    Q = -np.log(Q1)
+try:
+    import mpi4py
 
-    output[:, :, 0] = Q
-    output[:, :, 1] = Q * S * np.sin(F)
-    output[:, :, 2] = Q * S * np.cos(F)
-    output[:, :, 3] = Q * C
-    
-    sp.sync()
+    mpi4py.rc.finalize = False
+    from mpi4py import MPI
+
+    comm_rank = MPI.COMM_WORLD.Get_rank()
+    comm = MPI.COMM_WORLD
+except ImportError:
+    comm_rank = 0
+    comm = None
 
+def info(s):
+    if comm_rank == 0:
+        print(s)
 
-def sp_initialize(nevts, nout, seed, types_dict):
-    dtype = types_dict["float"]
+def naive_erf(x):
+    """
+    Error function (erf) implementation
 
-    sp.random.seed(seed)
-    C1 = sp.random.rand(nevts, nout)
-    F1 = sp.random.rand(nevts, nout)
-    Q1 = sp.random.rand(nevts, nout) * sp.random.rand(nevts, nout)
+    Adapted from formula 7.1.26 in
+    Abramowitz and Stegun, "Handbook of Mathematical Functions", 1965.
+    """
+    y = numpy.abs(x)
 
-    sp.sync()
+    a1 = 0.254829592
+    a2 = -0.284496736
+    a3 = 1.421413741
+    a4 = -1.453152027
+    a5 = 1.061405429
+    p = 0.3275911
 
-    return (C1, F1, Q1, sp.zeros((nevts, nout, 4), dtype))
+    t = 1.0 / (1.0 + p * y)
+    f = (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t
+    return numpy.sign(x) * (1.0 - f * numpy.exp(-y * y))
 
-def np_rambo(nevts, nout, C1, F1, Q1, output):
+def sp_rambo(sp, sp_C1, sp_F1, sp_Q1, sp_output, nevts, nout):
+    sp_C = 2.0 * sp_C1 - 1.0
+    sp_S = sp.sqrt(1 - sp.square(sp_C))
+    sp_F = 2.0 * sp.pi * sp_F1
+    sp_Q = -sp.log(sp_Q1)
+
+    sp_output[:, :, 0] = sp.reshape(sp_Q, (nevts, nout, 1))
+    sp_output[:, :, 1] = sp.reshape(sp_Q * sp_S * sp.sin(sp_F), (nevts, nout, 1))
+    sp_output[:, :, 2] = sp.reshape(sp_Q * sp_S * sp.cos(sp_F), (nevts, nout, 1))
+    sp_output[:, :, 3] = sp.reshape(sp_Q * sp_C, (nevts, nout, 1))
+
+    sharpy.sync()
+
+def np_rambo(np, C1, F1, Q1, output, nevts, nout):
     C = 2.0 * C1 - 1.0
     S = np.sqrt(1 - np.square(C))
     F = 2.0 * np.pi * F1
     Q = -np.log(Q1)
 
-    # numpy: could not broadcast input array from shape (10,10,1) into shape (10,10)
     output[:, :, 0] = Q
     output[:, :, 1] = Q * S * np.sin(F)
     output[:, :, 2] = Q * S * np.cos(F)
     output[:, :, 3] = Q * C
 
-def np_initialize(nevts, nout, seed, types_dict):
-    dtype = types_dict["float"]
-
+def initialize(np, nevts, nout, seed, dtype):
     np.random.seed(seed)
     C1 = np.random.rand(nevts, nout)
     F1 = np.random.rand(nevts, nout)
     Q1 = np.random.rand(nevts, nout) * np.random.rand(nevts, nout)
-
     return (C1, F1, Q1, np.zeros((nevts, nout, 4), dtype))
-    
-def np_run(nevts, nout, seed=42):
-    types_dict = {
-        "float": sp.float64
-    }
-    sp_C1, sp_F1, sp_Q1, sp_output = sp_initialize(nevts, nout, seed, types_dict)
-    # sp_rambo(nevts, nout, sp_C1, sp_F1, sp_Q1, sp_output)
-
-    types_dict = {
-        "float": np.float64
-    }
-    np_C1, np_F1, np_Q1, np_output = np_initialize(nevts, nout, seed, types_dict)
-
-    # assert np.allclose(sp.to_numpy(sp_C1), np_C1)
-    # assert np.allclose(sp.to_numpy(sp_F1), np_F1)
-    # assert np.allclose(sp.to_numpy(sp_Q1), np_Q1)
-
-    sp_rambo(sp_C1, sp_F1, sp_Q1, sp_output, np_C1, np_F1, np_Q1, np_output)
 
-    assert np.allclose(sp.to_numpy(sp_output), np_output)
+def run(nevts, nout, backend, iterations, datatype):
+    if backend == "sharpy":
+        import sharpy as np
+        from sharpy import fini, init, sync
+
+        device = os.getenv("SHARPY_DEVICE", "")
+        create_full = partial(np.full, device=device)
+        random_rand = partial(np.random.rand, device=device)
+        erf = np.erf
+        rambo = sp_rambo
+
+        init(False)
+    elif backend == "numpy":
+        import numpy as np
+
+        if comm is not None:
+            assert (
+                comm.Get_size() == 1
+            ), "Numpy backend only supports serial execution."
+
+        create_full = np.full
+        random_rand = np.random.rand
+        fini = sync = lambda x=None: None
+        erf = naive_erf
+        rambo = np_rambo
+    else:
+        raise ValueError(f'Unknown backend: "{backend}"')
+
+    dtype = {
+        "f32": np.float32,
+        "f64": np.float64,
+    }[datatype]
+
+    info(f"Using backend: {backend}")
+    info(f"Number of events: {nevts}")
+    info(f"Number of outputs: {nout}")
+    info(f"Datatype: {datatype}")
+
+    seed = 7777
+    C1, F1, Q1, output = initialize(np, nevts, nout, seed, dtype)
+    sync()
+
+    # verify
+    if backend == "sharpy":
+        sp_rambo(sharpy, C1, F1, Q1, output, nevts, nout)
+        # sync() !! not work here?
+        np_C1 = sharpy.to_numpy(C1)
+        np_F1 = sharpy.to_numpy(F1)
+        np_Q1 = sharpy.to_numpy(Q1)
+        np_output = numpy.zeros((nevts, nout, 4))
+        np_rambo(numpy, np_C1, np_F1, np_Q1, np_output, nevts, nout)
+        assert numpy.allclose(sharpy.to_numpy(output), np_output)
+
+    def eval():
+        tic = time_mod.perf_counter()
+        rambo(np, C1, F1, Q1, output, nevts, nout)
+        toc = time_mod.perf_counter()
+        return toc - tic
+
+    # warm-up run
+    t_warm = eval()
+
+    # evaluate
+    info(f"Running {iterations} iterations")
+    time_list = []
+    for i in range(iterations):
+        time_list.append(eval())
+
+    # get max time over mpi ranks
+    if comm is not None:
+        t_warm = comm.allreduce(t_warm, MPI.MAX)
+        time_list = comm.allreduce(time_list, MPI.MAX)
+
+    t_min = numpy.min(time_list)
+    t_max = numpy.max(time_list)
+    t_med = numpy.median(time_list)
+    # perf_rate = nopt / t_med / 1e6  # million options per second
+    init_overhead = t_warm - t_med
+    if backend == "sharpy":
+        info(f"Estimated initialization overhead: {init_overhead:.5f} s")
+    info(f"Min.   duration: {t_min:.5f} s")
+    info(f"Max.   duration: {t_max:.5f} s")
+    info(f"Median duration: {t_med:.5f} s")
+    # info(f"Median rate: {perf_rate:.5f} Mopts/s")
+
+    fini()
 
 if __name__ == "__main__":
-    sp.init(False)
-    nevts, nout = 10, 10
-    np_run(nevts, nout)
-    sp.fini()
+    parser = argparse.ArgumentParser(
+        description="Run rambo benchmark",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    parser.add_argument(
+        "-nevts",
+        "--num_events",
+        type=int,
+        default=10,
+        help="Number of events to evaluate.",
+    )
+    parser.add_argument(
+        "-nout",
+        "--num_outputs",
+        type=int,
+        default=10,
+        help="Number of outputs to evaluate.",
+    )
+
+    parser.add_argument(
+        "-b",
+        "--backend",
+        type=str,
+        default="sharpy",
+        choices=["sharpy", "numpy"],
+        help="Backend to use.",
+    )
+
+    parser.add_argument(
+        "-i",
+        "--iterations",
+        type=int,
+        default=10,
+        help="Number of iterations to run.",
+    )
+    parser.add_argument(
+        "-d",
+        "--datatype",
+        type=str,
+        default="f64",
+        choices=["f32", "f64"],
+        help="Datatype for model state variables",
+    )
+    args = parser.parse_args()
+    nevts, nout = args.num_events, args.num_outputs
+    run(
+        nevts,
+        nout,
+        args.backend,
+        args.iterations,
+        args.datatype,
+    )