Skip to content
Merged
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ repos:
- id: trailing-whitespace
exclude: '.*\.patch'
- repo: https://github.com/psf/black
rev: 24.3.0
rev: 24.8.0
hooks:
- id: black
args: ["--line-length", "80"]
language_version: python3
- repo: https://github.com/PyCQA/bandit
rev: '1.7.8'
rev: '1.7.9'
hooks:
- id: bandit
args: ["-c", ".bandit.yml"]
Expand All @@ -35,7 +35,7 @@ repos:
- id: isort
name: isort (python)
- repo: https://github.com/pycqa/flake8
rev: 7.0.0
rev: 7.1.1
hooks:
- id: flake8
- repo: https://github.com/pocc/pre-commit-hooks
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ include_directories(
${PROJECT_SOURCE_DIR}/third_party/bitsery/include
${MPI_INCLUDE_PATH}
${pybind11_INCLUDE_DIRS}
${LLVM_INCLUDE_DIRS}
${MLIR_INCLUDE_DIRS}
${IMEX_INCLUDE_DIRS})

Expand Down
184 changes: 184 additions & 0 deletions examples/transpose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
"""
Transpose benchmark

Matrix transpose benchmark for sharpy and numpy backends.

Examples:

# Run 1000 iterations of 1000*1000 matrix on sharpy backend
python transpose.py -r 10 -c 1000 -b sharpy -i 1000

# MPI parallel run
mpiexec -n 3 python transpose.py -r 1000 -c 1000 -b sharpy -i 1000

"""

import argparse
import time as time_mod

import numpy

import sharpy

try:
import mpi4py

mpi4py.rc.finalize = False
from mpi4py import MPI

comm_rank = MPI.COMM_WORLD.Get_rank()
comm = MPI.COMM_WORLD
except ImportError:
comm_rank = 0
comm = None


def info(s):
if comm_rank == 0:
print(s)


def sp_transpose(arr):
brr = sharpy.permute_dims(arr, [1, 0])
sharpy.sync()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need a sync here?

return brr


def np_transpose(arr):
brr = arr.transpose()
return brr.copy()


def initialize(np, row, col, dtype):
arr = np.arange(0, row * col, 1, dtype=dtype)
return np.reshape(arr, (row, col))


def run(row, col, backend, iterations, datatype):
if backend == "sharpy":
import sharpy as np
from sharpy import fini, init, sync

transpose = sp_transpose

init(False)
elif backend == "numpy":
import numpy as np

if comm is not None:
assert (
comm.Get_size() == 1
), "Numpy backend only supports serial execution."

fini = sync = lambda x=None: None
transpose = np_transpose
else:
raise ValueError(f'Unknown backend: "{backend}"')

dtype = {
"f32": np.float32,
"f64": np.float64,
}[datatype]

info(f"Using backend: {backend}")
info(f"Number of row: {row}")
info(f"Number of column: {col}")
info(f"Datatype: {datatype}")

arr = initialize(np, row, col, dtype)
sync()

# verify
if backend == "sharpy":
brr = sp_transpose(arr)
crr = np_transpose(sharpy.to_numpy(arr))
assert numpy.allclose(sharpy.to_numpy(brr), crr)

def eval():
tic = time_mod.perf_counter()
transpose(arr)
toc = time_mod.perf_counter()
return toc - tic

# warm-up run
t_warm = eval()

# evaluate
info(f"Running {iterations} iterations")
time_list = []
for i in range(iterations):
time_list.append(eval())

# get max time over mpi ranks
if comm is not None:
t_warm = comm.allreduce(t_warm, MPI.MAX)
time_list = comm.allreduce(time_list, MPI.MAX)

t_min = numpy.min(time_list)
t_max = numpy.max(time_list)
t_med = numpy.median(time_list)
init_overhead = t_warm - t_med
if backend == "sharpy":
info(f"Estimated initialization overhead: {init_overhead:.5f} s")
info(f"Min. duration: {t_min:.5f} s")
info(f"Max. duration: {t_max:.5f} s")
info(f"Median duration: {t_med:.5f} s")

fini()


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Run transpose benchmark",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)

parser.add_argument(
"-r",
"--row",
type=int,
default=10000,
help="Number of row.",
)
parser.add_argument(
"-c",
"--column",
type=int,
default=10000,
help="Number of column.",
)

parser.add_argument(
"-b",
"--backend",
type=str,
default="sharpy",
choices=["sharpy", "numpy"],
help="Backend to use.",
)

parser.add_argument(
"-i",
"--iterations",
type=int,
default=10,
help="Number of iterations to run.",
)

parser.add_argument(
"-d",
"--datatype",
type=str,
default="f64",
choices=["f32", "f64"],
help="Datatype for model state variables",
)

args = parser.parse_args()
run(
args.row,
args.column,
args.backend,
args.iterations,
args.datatype,
)
2 changes: 1 addition & 1 deletion imex_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5a7bb80ede5fe4fa8d56ee0dd77c4e5c1327fe09
8ae485bbfb1303a414b375e25130fcaa4c02127a
6 changes: 5 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import multiprocessing
import os
import pathlib

Expand Down Expand Up @@ -44,7 +45,10 @@ def build_cmake(self, ext):
os.chdir(str(build_temp))
self.spawn(["cmake", str(cwd)] + cmake_args)
if not self.dry_run:
self.spawn(["cmake", "--build", ".", "-j5"] + build_args)
self.spawn(
["cmake", "--build", ".", f"-j{multiprocessing.cpu_count()}"]
+ build_args
)
# Troubleshooting: if fail on line above then delete all possible
# temporary CMake files including "CMakeCache.txt" in top level dir.
os.chdir(str(cwd))
Expand Down
4 changes: 4 additions & 0 deletions sharpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ def _validate_device(device):
exec(
f"{func} = lambda this, shape, cp=None: ndarray(_csp.ManipOp.reshape(this._t, shape, cp))"
)
elif func == "permute_dims":
exec(
f"{func} = lambda this, axes: ndarray(_csp.ManipOp.permute_dims(this._t, axes))"
)

for func in api.api_categories["ReduceOp"]:
FUNC = func.upper()
Expand Down
1 change: 1 addition & 0 deletions sharpy/array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@
"roll", # (x, /, shift, *, axis=None)
"squeeze", # (x, /, axis)
"stack", # (arrays, /, *, axis=0)
"permute_dims", # (x: array, /, axes: Tuple[int, ...]) → array
],
"LinAlgOp": [
"matmul", # (x1, x2, /)
Expand Down
2 changes: 1 addition & 1 deletion src/EWBinOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ struct DeferredEWBinOp : public Deferred {
auto av = dm.getDependent(builder, Registry::get(_a));
auto bv = dm.getDependent(builder, Registry::get(_b));

auto aTyp = av.getType().cast<::imex::ndarray::NDArrayType>();
auto aTyp = ::mlir::cast<::imex::ndarray::NDArrayType>(av.getType());
auto outElemType =
::imex::ndarray::toMLIR(builder, SHARPY::jit::getPTDType(_dtype));
auto outTyp = aTyp.cloneWith(shape(), outElemType);
Expand Down
2 changes: 1 addition & 1 deletion src/EWUnyOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ struct DeferredEWUnyOp : public Deferred {
jit::DepManager &dm) override {
auto av = dm.getDependent(builder, Registry::get(_a));

auto aTyp = av.getType().cast<::imex::ndarray::NDArrayType>();
auto aTyp = ::mlir::cast<::imex::ndarray::NDArrayType>(av.getType());
auto outTyp = aTyp.cloneWith(shape(), aTyp.getElementType());

auto ndOpId = sharpy(_op);
Expand Down
2 changes: 1 addition & 1 deletion src/IEWBinOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ struct DeferredIEWBinOp : public Deferred {
auto av = dm.getDependent(builder, Registry::get(_a));
auto bv = dm.getDependent(builder, Registry::get(_b));

auto aTyp = av.getType().cast<::imex::ndarray::NDArrayType>();
auto aTyp = ::mlir::cast<::imex::ndarray::NDArrayType>(av.getType());
auto outTyp = aTyp.cloneWith(shape(), aTyp.getElementType());

auto binop = builder.create<::imex::ndarray::EWBinOp>(
Expand Down
Loading
Loading