IntelPython
diff --git a/‎CMakeLists.txt
Lines changed: 27 additions & 77 deletions b/‎CMakeLists.txt
Lines changed: 27 additions & 77 deletions
diff --git a/‎examples/stencil-2d.py
Lines changed: 3 additions & 3 deletions b/‎examples/stencil-2d.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎export-ddpt_rt.txt
Lines changed: 5 additions & 0 deletions b/‎export-ddpt_rt.txt
Lines changed: 5 additions & 0 deletions
diff --git a/‎scripts/code_gen.py
Lines changed: 3 additions & 1 deletion b/‎scripts/code_gen.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/CollComm.cpp
Lines changed: 3 additions & 0 deletions b/‎src/CollComm.cpp
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/Creator.cpp
Lines changed: 16 additions & 10 deletions b/‎src/Creator.cpp
Lines changed: 16 additions & 10 deletions
diff --git a/‎src/DDPTensorImpl.cpp
Lines changed: 3 additions & 0 deletions b/‎src/DDPTensorImpl.cpp
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/Deferred.cpp
Lines changed: 5 additions & 4 deletions b/‎src/Deferred.cpp
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/EWBinOp.cpp
Lines changed: 20 additions & 8 deletions b/‎src/EWBinOp.cpp
Lines changed: 20 additions & 8 deletions
@@ -68,7 +68,7 @@ list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
 #include(AddIMEX)
 
 # macro for mlir root directory
-add_compile_definitions(CMAKE_MLIR_ROOT="${MLIR_ROOT}")
+add_compile_definitions(CMAKE_MLIR_ROOT="${MLIR_ROOT}" CMAKE_IMEX_ROOT="${IMEX_ROOT}")
 
 #find_package(OpenMP)
 
@@ -106,17 +106,18 @@ set(DDPTSrcs
     ${PROJECT_SOURCE_DIR}/src/Random.cpp
     ${PROJECT_SOURCE_DIR}/src/ReduceOp.cpp
     ${PROJECT_SOURCE_DIR}/src/SetGetItem.cpp
+    ${PROJECT_SOURCE_DIR}/src/jit/mlir.cpp
+    ${PROJECT_SOURCE_DIR}/src/Service.cpp
+    ${PROJECT_SOURCE_DIR}/src/Deferred.cpp
 )
 set(RTSrcs
+    ${PROJECT_SOURCE_DIR}/src/Mediator.cpp
+    ${PROJECT_SOURCE_DIR}/src/MPIMediator.cpp
     ${PROJECT_SOURCE_DIR}/src/CollComm.cpp
     ${PROJECT_SOURCE_DIR}/src/DDPTensorImpl.cpp
-    ${PROJECT_SOURCE_DIR}/src/Deferred.cpp
     ${PROJECT_SOURCE_DIR}/src/Factory.cpp
-    ${PROJECT_SOURCE_DIR}/src/Mediator.cpp
-    ${PROJECT_SOURCE_DIR}/src/MPIMediator.cpp
     ${PROJECT_SOURCE_DIR}/src/Registry.cpp
-    ${PROJECT_SOURCE_DIR}/src/Service.cpp
-    ${PROJECT_SOURCE_DIR}/src/jit/mlir.cpp
+    ${PROJECT_SOURCE_DIR}/src/_deferred.cpp
 )
 set(IDTRSrcs
     ${PROJECT_SOURCE_DIR}/src/idtr.cpp
@@ -143,13 +144,17 @@ include_directories(
 
 if (CMAKE_SYSTEM_NAME STREQUAL Linux)
   target_link_options(_ddptensor PRIVATE "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/export.txt")
+  target_link_options(_ddpt_rt PRIVATE "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/export-ddpt_rt.txt")
+  # target_link_options(idtr PRIVATE "LINKER:-fvisibility=hidden" "LINKER:--exclude-libs,All")
 endif()
 
 #compile_options(_ddptensor PRIVATE -fopenmp)
-get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
-get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
-get_property(mlir_all_libs GLOBAL PROPERTY MLIR_ALL_LIBS)
-get_property(imex_all_libs GLOBAL PROPERTY IMEX_ALL_LIBS)
+get_property(mlir_dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+get_property(mlir_conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
+get_property(mlir_extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
+get_property(mlir_translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS)
+get_property(imex_dialect_libs GLOBAL PROPERTY IMEX_DIALECT_LIBS)
+get_property(imex_conversion_libs GLOBAL PROPERTY IMEX_CONVERSION_LIBS)
 
 #llvm_update_compile_flags(_ddpttensor)
 target_link_directories(_ddptensor PRIVATE ${CONDA_PREFIX}/lib)
@@ -159,6 +164,18 @@ target_link_directories(idtr PRIVATE ${CONDA_PREFIX}/lib)
 target_link_libraries(_ddptensor PRIVATE
     # ${MKL_LIBRARIES}
     # tbb
+    ${mlir_dialect_libs}
+    ${mlir_conversion_libs}
+    ${mlir_extension_libs}
+    ${mlir_translation_libs}
+    MLIROptLib
+    MLIRExecutionEngine
+    ${imex_dialect_libs}
+    ${imex_conversion_libs}
+    IMEXTransforms
+    IMEXUtil
+    LLVMX86CodeGen
+    LLVMX86AsmParser
     _ddpt_rt
     idtr
 )
@@ -171,71 +188,4 @@ target_link_libraries(_ddpt_rt PRIVATE
     ${MPI_C_LIBRARIES}
     # ${MKL_LIBRARIES}
     tbb
-    IMEXPTensorDialect
-    IMEXPTensorTransforms
-    IMEXPTensorToLinalg
-    IMEXDistDialect
-    IMEXDistTransforms
-    IMEXDistToStandard
-    IMEXDistRuntimeDialect
-    IMEXDistRuntimeTransforms
-    IMEXUtil
-    IMEXTransforms
-	  MLIROptLib
-    MLIRExecutionEngine
-    MLIRIR
-    MLIRAffineDialect
-    MLIRAffineToStandard
-    MLIRAffineTransforms
-    MLIRFuncDialect
-    MLIRFuncToLLVM
-    MLIRFuncTransforms
-    MLIRLinalgDialect
-    MLIRLinalgTransforms
-    MLIRLLVMDialect
-    MLIRMathDialect
-    MLIRMathToFuncs
-    MLIRMathToLibm
-    MLIRMathToLLVM
-    MLIRMathTransforms
-    MLIRMemRefDialect
-    MLIRMemRefToLLVM
-    MLIRMemRefTransforms
-    MLIROpenMPDialect
-    MLIROpenMPToLLVM
-    MLIROpenMPToLLVMIRTranslation
-    MLIRReconcileUnrealizedCasts
-    MLIRSCFDialect
-    MLIRSCFToOpenMP
-    MLIRSCFToControlFlow
-    MLIRSCFTransforms
-    MLIRShapeDialect
-    MLIRShapeOpsTransforms
-    MLIRShapeToStandard
-    MLIRTosaDialect
-    MLIRTosaToLinalg
-    MLIRTosaToTensor
-    MLIRTensorTransforms
 )
-    # LLVM${LLVM_NATIVE_ARCH}CodeGen
-    # LLVM${LLVM_NATIVE_ARCH}Desc
-    # LLVMTarget
-    # MLIRAnalysis
-    # MLIRCallInterfaces
-    # MLIRCastInterfaces
-    # MLIRGPUToGPURuntimeTransforms
-    # MLIRGPUToSPIRV
-    # MLIRLLVMCommonConversion
-    # MLIRLLVMToLLVMIRTranslation
-    # MLIRLinalgTransforms
-    # MLIRMathToLibm
-    # MLIRMemRef
-    # MLIRParser
-    # MLIRPass
-    # MLIRReconcileUnrealizedCasts
-    # MLIRSCFToGPU
-    # MLIRSPIRVSerialization
-    # MLIRSPIRVTransforms
-    # MLIRSideEffectInterfaces
-    # MLIRTargetLLVMIRExport
-    # MLIRTransforms
@@ -120,8 +120,8 @@ def main():
     # there is certainly a more Pythonic way to initialize W,
     # but it will have no impact on performance.
     t0 = timer()
-    W = np.zeros(((2 * r + 1), (2 * r + 1)), dtype=np.float64)
-    B = np.zeros((n, n), dtype=np.float64)
+    W = np.zeros(((2 * r + 1), (2 * r + 1)), dtype=np.float32)
+    B = np.zeros((n, n), dtype=np.float32)
 
     if pattern == "star":
         stencil_size = 4 * r + 1
@@ -143,7 +143,7 @@ def main():
             W[r + j, r + j] = +1.0 / (4 * j * r)
             W[r - j, r - j] = -1.0 / (4 * j * r)
 
-    A = np.numpy.fromfunction(lambda i, j: i + j, (n, n), dtype=np.float64)
+    A = np.numpy.fromfunction(lambda i, j: i + j, (n, n), dtype=np.float32)
 
     for k in range(iterations + 1):
         # start timer after a warmup iteration
 
@@ -0,0 +1,5 @@
+{
+	global:
+        *DDPT*;
+	local: *;
+};
@@ -19,6 +19,8 @@
 #include <pybind11/stl.h>
 namespace py = pybind11;
 #endif
+
+namespace DDPT {
 """
 )
 
@@ -43,7 +45,7 @@
         print(f'        .value("{x.upper()}", {x.upper()})')
     print("        .export_values();\n")
 
-print("}\n#endif\n")
+print("}\n#endif\n} // namespace DDPT")
 
 # Close the file
 sys.stdout.close()
@@ -2,6 +2,8 @@
 
 #include "ddptensor/CollComm.hpp"
 
+namespace DDPT {
+
 void bufferize(DDPTensorImpl::ptr_type a_ptr, void *outPtr) {
   dispatch(a_ptr->dtype(), a_ptr->data(), [&a_ptr, outPtr](auto *ptr) {
     auto buff = static_cast<decltype(ptr)>(outPtr);
@@ -153,3 +155,4 @@ std::vector<std::vector<int>> CollComm::map(const PVSlice &n_slc,
 #endif // if 0
   return {};
 }
+} // namespace DDPT
@@ -8,6 +8,7 @@
 #include "ddptensor/Factory.hpp"
 #include "ddptensor/Transceiver.hpp"
 #include "ddptensor/TypeDispatch.hpp"
+#include "ddptensor/jit/mlir.hpp"
 
 #include <imex/Dialect/PTensor/IR/PTensorOps.h>
 #include <imex/Utils/PassUtils.h>
@@ -18,6 +19,8 @@
 #include <mlir/Dialect/Tensor/IR/Tensor.h>
 #include <mlir/IR/Builders.h>
 
+namespace DDPT {
+
 static const char *FORCE_DIST = getenv("DDPT_FORCE_DIST");
 
 inline uint64_t mkTeam(uint64_t team) {
@@ -36,8 +39,9 @@ struct DeferredFull : public Deferred {
       : Deferred(dtype, shape, team, true), _val(val) {}
 
   template <typename T> struct ValAndDType {
-    static ::mlir::Value op(::mlir::OpBuilder &builder, ::mlir::Location loc,
-                            const PyScalar &val, ::imex::ptensor::DType &dtyp) {
+    static ::mlir::Value op(::mlir::OpBuilder &builder,
+                            const ::mlir::Location &loc, const PyScalar &val,
+                            ::imex::ptensor::DType &dtyp) {
       dtyp = jit::PT_DTYPE<T>::value;
 
       if (is_none(val)) {
@@ -54,7 +58,7 @@ struct DeferredFull : public Deferred {
     };
   };
 
-  bool generate_mlir(::mlir::OpBuilder &builder, ::mlir::Location loc,
+  bool generate_mlir(::mlir::OpBuilder &builder, const ::mlir::Location &loc,
                      jit::DepManager &dm) override {
     ::mlir::SmallVector<::mlir::Value> shp(rank());
     for (auto i = 0; i < rank(); ++i) {
@@ -124,7 +128,7 @@ struct DeferredArange : public Deferred {
                  team, true),
         _start(start), _end(end), _step(step) {}
 
-  bool generate_mlir(::mlir::OpBuilder &builder, ::mlir::Location loc,
+  bool generate_mlir(::mlir::OpBuilder &builder, const ::mlir::Location &loc,
                      jit::DepManager &dm) override {
     // ::mlir::Value
     auto transceiver = getTransceiver();
@@ -192,7 +196,7 @@ struct DeferredLinspace : public Deferred {
       : Deferred(dtype, {static_cast<shape_type::value_type>(num)}, team, true),
         _start(start), _end(end), _num(num), _endpoint(endpoint) {}
 
-  bool generate_mlir(::mlir::OpBuilder &builder, ::mlir::Location loc,
+  bool generate_mlir(::mlir::OpBuilder &builder, const ::mlir::Location &loc,
                      jit::DepManager &dm) override {
     // ::mlir::Value
     auto teamV = team() == 0
@@ -247,14 +251,15 @@ ddptensor *Creator::linspace(double start, double end, uint64_t num,
 
 // ***************************************************************************
 
+extern DTypeId DEFAULT_FLOAT;
+extern DTypeId DEFAULT_INT;
+
 std::pair<ddptensor *, bool> Creator::mk_future(const py::object &b,
-                                                uint64_t team) {
+                                                uint64_t team, DTypeId dtype) {
   if (py::isinstance<ddptensor>(b)) {
     return {b.cast<ddptensor *>(), false};
-  } else if (py::isinstance<py::float_>(b)) {
-    return {Creator::full({}, b, FLOAT64, team), true};
-  } else if (py::isinstance<py::int_>(b)) {
-    return {Creator::full({}, b, INT64, team), true};
+  } else if (py::isinstance<py::float_>(b) || py::isinstance<py::int_>(b)) {
+    return {Creator::full({}, b, dtype, team), true};
   }
   throw std::runtime_error(
       "Invalid right operand to elementwise binary operation");
@@ -263,3 +268,4 @@ std::pair<ddptensor *, bool> Creator::mk_future(const py::object &b,
 FACTORY_INIT(DeferredFull, F_FULL);
 FACTORY_INIT(DeferredArange, F_ARANGE);
 FACTORY_INIT(DeferredLinspace, F_LINSPACE);
+} // namespace DDPT
@@ -10,6 +10,8 @@
 #include <algorithm>
 #include <iostream>
 
+namespace DDPT {
+
 DDPTensorImpl::DDPTensorImpl(
     Transceiver *transceiver, DTypeId dtype, shape_type gShape,
     void *l_allocated, void *l_aligned, intptr_t l_offset,
@@ -242,3 +244,4 @@ void DDPTensorImpl::replicate() {
   });
   set_owner(REPLICATED);
 }
+} // namespace DDPT
@@ -13,6 +13,7 @@
 #include "include/ddptensor/Service.hpp"
 #include "include/ddptensor/Transceiver.hpp"
 #include "include/ddptensor/itac.hpp"
+#include "include/ddptensor/jit/mlir.hpp"
 
 #include <imex/Dialect/PTensor/IR/PTensorOps.h>
 #include <mlir/Dialect/Func/IR/FuncOps.h>
@@ -24,11 +25,10 @@ namespace py = pybind11;
 
 #include <iostream>
 
-// thread-safe FIFO queue holding deferred objects
-static tbb::concurrent_bounded_queue<Runable::ptr_type> _deferred;
+namespace DDPT {
 
-// add a deferred object to the queue
-void push_runable(Runable::ptr_type &&r) { _deferred.push(std::move(r)); }
+// thread-safe FIFO queue holding deferred objects
+extern tbb::concurrent_bounded_queue<Runable::ptr_type> _deferred;
 
 // if needed, object/promise is broadcasted to worker processes
 // (for controller/worker mode)
@@ -180,3 +180,4 @@ void process_promises() {
     }
   } while (!done);
 }
+} // namespace DDPT
@@ -8,17 +8,21 @@
 #include "ddptensor/Broadcast.hpp"
 #include "ddptensor/Creator.hpp"
 #include "ddptensor/DDPTensorImpl.hpp"
+#include "ddptensor/Deferred.hpp"
 #include "ddptensor/Factory.hpp"
 #include "ddptensor/LinAlgOp.hpp"
 #include "ddptensor/Registry.hpp"
 #include "ddptensor/TypeDispatch.hpp"
 #include "ddptensor/TypePromotion.hpp"
+#include "ddptensor/jit/mlir.hpp"
 
 #include <imex/Dialect/Dist/IR/DistOps.h>
 #include <imex/Dialect/PTensor/IR/PTensorOps.h>
 #include <mlir/Dialect/Shape/IR/Shape.h>
 #include <mlir/IR/Builders.h>
 
+namespace DDPT {
+
 // convert id of our binop to id of imex::ptensor binop
 static ::imex::ptensor::EWBinOpId ddpt2mlir(const EWBinOpId bop) {
   switch (bop) {
@@ -91,7 +95,7 @@ struct DeferredEWBinOp : public Deferred {
       : Deferred(a.dtype(), broadcast(a.shape(), b.shape()), a.team(), true),
         _a(a.guid()), _b(b.guid()), _op(op) {}
 
-  bool generate_mlir(::mlir::OpBuilder &builder, ::mlir::Location loc,
+  bool generate_mlir(::mlir::OpBuilder &builder, const ::mlir::Location &loc,
                      jit::DepManager &dm) override {
     // FIXME the type of the result is based on a only
     auto av = dm.getDependent(builder, _a);
@@ -135,13 +139,20 @@ struct DeferredEWBinOp : public Deferred {
 
 ddptensor *EWBinOp::op(EWBinOpId op, const py::object &a, const py::object &b) {
   uint64_t teama = 0, teamb = 0;
-  if (py::isinstance<ddptensor>(a))
-    teama = a.cast<ddptensor *>()->get().team();
-  else if (py::isinstance<ddptensor>(b))
-    teamb = b.cast<ddptensor *>()->get().team();
-  auto team = teama ? teama : teamb;
-  auto bb = Creator::mk_future(b, team);
-  auto aa = Creator::mk_future(a, team);
+  DTypeId dtypea = DTYPE_LAST, dtypeb = DTYPE_LAST;
+
+  if (py::isinstance<ddptensor>(a)) {
+    auto tmp = a.cast<ddptensor *>()->get();
+    teama = tmp.team();
+    dtypea = tmp.dtype();
+  }
+  if (py::isinstance<ddptensor>(b)) {
+    auto tmp = b.cast<ddptensor *>()->get();
+    teamb = tmp.team();
+    dtypeb = tmp.dtype();
+  }
+  auto aa = Creator::mk_future(a, teamb, dtypeb);
+  auto bb = Creator::mk_future(b, teama, dtypea);
   if (bb.first->get().team() != aa.first->get().team()) {
     throw std::runtime_error(
         "teams of operands do not match in binary operation");
@@ -159,3 +170,4 @@ ddptensor *EWBinOp::op(EWBinOpId op, const py::object &a, const py::object &b) {
 }
 
 FACTORY_INIT(DeferredEWBinOp, F_EWBINOP);
+} // namespace DDPT
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +{
 +	global:
 +        *DDPT*;
 +	local: *;
 +};