IntelPython
diff --git a/‎.github/workflows/ci.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ci.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎ddptensor/__init__.py
Lines changed: 6 additions & 6 deletions b/‎ddptensor/__init__.py
Lines changed: 6 additions & 6 deletions
diff --git a/‎ddptensor/numpy/__init__.py
Lines changed: 2 additions & 2 deletions b/‎ddptensor/numpy/__init__.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎imex_version.txt
Lines changed: 1 addition & 1 deletion b/‎imex_version.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/Creator.cpp
Lines changed: 50 additions & 65 deletions b/‎src/Creator.cpp
Lines changed: 50 additions & 65 deletions
diff --git a/‎src/DDPTensorImpl.cpp
Lines changed: 20 additions & 27 deletions b/‎src/DDPTensorImpl.cpp
Lines changed: 20 additions & 27 deletions
diff --git a/‎src/Deferred.cpp
Lines changed: 2 additions & 2 deletions b/‎src/Deferred.cpp
Lines changed: 2 additions & 2 deletions
@@ -84,7 +84,7 @@ jobs:
           git remote add origin https://github.com/llvm/llvm-project || exit 1
           git fetch origin ${{ env.LLVM_SHA }} || exit 1
           git reset --hard FETCH_HEAD || exit 1
-          if [ -d "$GITHUB_WORKSPACE/third_party/imex/build_tools/patches" ]; then git apply $GITHUB_WORKSPACE/third_party/imex/build_tools/patches/*.patch; fi
+          # FIXME if [ -d "$GITHUB_WORKSPACE/third_party/imex/build_tools/patches" ]; then git apply $GITHUB_WORKSPACE/third_party/imex/build_tools/patches/*.patch; fi
           cd -
           mkdir -p build/llvm-mlir || exit 1
           cd build/llvm-mlir || exit 1
 
@@ -69,27 +69,27 @@ def to_numpy(a):
     FUNC = func.upper()
     if func == "full":
         exec(
-            f"{func} = lambda shape, val, dtype, team=1: dtensor(_cdt.Creator.full(shape, val, dtype, team))"
+            f"{func} = lambda shape, val, dtype=float64, device='', team=1: dtensor(_cdt.Creator.full(shape, val, dtype, device, team))"
         )
     elif func == "empty":
         exec(
-            f"{func} = lambda shape, dtype, team=1: dtensor(_cdt.Creator.full(shape, None, dtype, team))"
+            f"{func} = lambda shape, dtype=float64, device='', team=1: dtensor(_cdt.Creator.full(shape, None, dtype, device, team))"
         )
     elif func == "ones":
         exec(
-            f"{func} = lambda shape, dtype, team=1: dtensor(_cdt.Creator.full(shape, 1, dtype, team))"
+            f"{func} = lambda shape, dtype=float64, device='', team=1: dtensor(_cdt.Creator.full(shape, 1, dtype, device, team))"
         )
     elif func == "zeros":
         exec(
-            f"{func} = lambda shape, dtype, team=1: dtensor(_cdt.Creator.full(shape, 0, dtype, team))"
+            f"{func} = lambda shape, dtype=float64, device='', team=1: dtensor(_cdt.Creator.full(shape, 0, dtype, device, team))"
         )
     elif func == "arange":
         exec(
-            f"{func} = lambda start, end, step, dtype, team=1: dtensor(_cdt.Creator.arange(start, end, step, dtype, team))"
+            f"{func} = lambda start, end, step, dtype=int64, device='', team=1: dtensor(_cdt.Creator.arange(start, end, step, dtype, device, team))"
         )
     elif func == "linspace":
         exec(
-            f"{func} = lambda start, end, step, endpoint, dtype, team=1: dtensor(_cdt.Creator.linspace(start, end, step, endpoint, dtype, team))"
+            f"{func} = lambda start, end, step, endpoint, dtype=float64, device='', team=1: dtensor(_cdt.Creator.linspace(start, end, step, endpoint, dtype, device, team))"
         )
 
 for func in api.api_categories["ReduceOp"]:
 
@@ -1,7 +1,7 @@
 from .. import empty, float32
 
 
-def fromfunction(function, shape, *, dtype=float32, team=1):
-    t = empty(shape, dtype, team)
+def fromfunction(function, shape, *, dtype=float32, device="", team=1):
+    t = empty(shape, dtype=dtype, device=device, team=team)
     t._t.map(function)
     return t
@@ -1 +1 @@
-06523703fa96f9731b9c3c1c7f6fee0914ef9f26
+dd921a5893d2956ddee3d7fecb84612edf15fbbe
@@ -10,6 +10,7 @@
 #include "ddptensor/TypeDispatch.hpp"
 #include "ddptensor/jit/mlir.hpp"
 
+#include <imex/Dialect/Dist/IR/DistOps.h>
 #include <imex/Dialect/PTensor/IR/PTensorOps.h>
 #include <imex/Utils/PassUtils.h>
 
@@ -35,8 +36,8 @@ struct DeferredFull : public Deferred {
 
   DeferredFull() = default;
   DeferredFull(const shape_type &shape, PyScalar val, DTypeId dtype,
-               uint64_t team)
-      : Deferred(dtype, shape, team, true), _val(val) {}
+               const std::string &device, uint64_t team)
+      : Deferred(dtype, shape, device, team), _val(val) {}
 
   template <typename T> struct ValAndDType {
     static ::mlir::Value op(::mlir::OpBuilder &builder,
@@ -67,35 +68,27 @@ struct DeferredFull : public Deferred {
 
     ::imex::ptensor::DType dtyp;
     ::mlir::Value val = dispatch<ValAndDType>(_dtype, builder, loc, _val, dtyp);
-
-    auto transceiver = getTransceiver();
-    auto teamV = team() == 0
-                     ? ::mlir::Value()
-                     : ::imex::createIndex(loc, builder,
-                                           reinterpret_cast<uint64_t>(team()));
-
-    auto rTyp = ::imex::ptensor::PTensorType::get(
-        shape(), imex::ptensor::toMLIR(builder, dtyp));
-
-    dm.addVal(this->guid(),
-              builder.create<::imex::ptensor::CreateOp>(loc, rTyp, shp, dtyp,
-                                                        val, nullptr, teamV),
-              [this](uint64_t rank, void *l_allocated, void *l_aligned,
-                     intptr_t l_offset, const intptr_t *l_sizes,
-                     const intptr_t *l_strides, void *o_allocated,
-                     void *o_aligned, intptr_t o_offset,
-                     const intptr_t *o_sizes, const intptr_t *o_strides,
-                     void *r_allocated, void *r_aligned, intptr_t r_offset,
-                     const intptr_t *r_sizes, const intptr_t *r_strides,
-                     uint64_t *lo_allocated, uint64_t *lo_aligned) {
-                assert(rank == this->rank());
-                this->set_value(std::move(mk_tnsr(
-                    reinterpret_cast<Transceiver *>(this->team()), _dtype,
-                    this->shape(), l_allocated, l_aligned, l_offset, l_sizes,
-                    l_strides, o_allocated, o_aligned, o_offset, o_sizes,
-                    o_strides, r_allocated, r_aligned, r_offset, r_sizes,
-                    r_strides, lo_allocated, lo_aligned)));
-              });
+    auto envs = jit::mkEnvs(builder, rank(), _device, team());
+
+    dm.addVal(
+        this->guid(),
+        builder.create<::imex::ptensor::CreateOp>(loc, shp, dtyp, val, envs),
+        [this](uint64_t rank, void *l_allocated, void *l_aligned,
+               intptr_t l_offset, const intptr_t *l_sizes,
+               const intptr_t *l_strides, void *o_allocated, void *o_aligned,
+               intptr_t o_offset, const intptr_t *o_sizes,
+               const intptr_t *o_strides, void *r_allocated, void *r_aligned,
+               intptr_t r_offset, const intptr_t *r_sizes,
+               const intptr_t *r_strides, uint64_t *lo_allocated,
+               uint64_t *lo_aligned) {
+          assert(rank == this->rank());
+          this->set_value(std::move(
+              mk_tnsr(reinterpret_cast<Transceiver *>(this->team()), _dtype,
+                      this->shape(), l_allocated, l_aligned, l_offset, l_sizes,
+                      l_strides, o_allocated, o_aligned, o_offset, o_sizes,
+                      o_strides, r_allocated, r_aligned, r_offset, r_sizes,
+                      r_strides, lo_allocated, lo_aligned)));
+        });
     return false;
   }
 
@@ -109,9 +102,11 @@ struct DeferredFull : public Deferred {
 };
 
 ddptensor *Creator::full(const shape_type &shape, const py::object &val,
-                         DTypeId dtype, uint64_t team) {
+                         DTypeId dtype, const std::string &device,
+                         uint64_t team) {
   auto v = mk_scalar(val, dtype);
-  return new ddptensor(defer<DeferredFull>(shape, v, dtype, mkTeam(team)));
+  return new ddptensor(
+      defer<DeferredFull>(shape, v, dtype, device, mkTeam(team)));
 }
 
 // ***************************************************************************
@@ -121,33 +116,25 @@ struct DeferredArange : public Deferred {
 
   DeferredArange() = default;
   DeferredArange(uint64_t start, uint64_t end, uint64_t step, DTypeId dtype,
-                 uint64_t team)
+                 const std::string &device, uint64_t team)
       : Deferred(dtype,
                  {static_cast<shape_type::value_type>(
                      (end - start + step + (step < 0 ? 1 : -1)) / step)},
-                 team, true),
+                 device, team),
         _start(start), _end(end), _step(step) {}
 
   bool generate_mlir(::mlir::OpBuilder &builder, const ::mlir::Location &loc,
                      jit::DepManager &dm) override {
-    // ::mlir::Value
-    auto transceiver = getTransceiver();
-    auto teamV = team() == 0
-                     ? ::mlir::Value()
-                     : ::imex::createIndex(loc, builder,
-                                           reinterpret_cast<uint64_t>(team()));
-
     auto _num = shape()[0];
-
     auto start = ::imex::createFloat(loc, builder, _start);
     auto stop = ::imex::createFloat(loc, builder, _start + _num * _step);
     auto num = ::imex::createIndex(loc, builder, _num);
-    auto rTyp = ::imex::ptensor::PTensorType::get(
-        shape(), imex::ptensor::toMLIR(builder, jit::getPTDType(_dtype)));
+    auto dtyp = jit::getPTDType(dtype());
+    auto envs = jit::mkEnvs(builder, rank(), _device, team());
 
     dm.addVal(this->guid(),
-              builder.create<::imex::ptensor::LinSpaceOp>(
-                  loc, rTyp, start, stop, num, false, nullptr, teamV),
+              builder.create<::imex::ptensor::LinSpaceOp>(loc, start, stop, num,
+                                                          false, dtyp, envs),
               [this](uint64_t rank, void *l_allocated, void *l_aligned,
                      intptr_t l_offset, const intptr_t *l_sizes,
                      const intptr_t *l_strides, void *o_allocated,
@@ -157,7 +144,7 @@ struct DeferredArange : public Deferred {
                      const intptr_t *r_sizes, const intptr_t *r_strides,
                      uint64_t *lo_allocated, uint64_t *lo_aligned) {
                 assert(rank == 1);
-                assert(l_strides[0] == 1);
+                assert(o_strides[0] == 1);
                 this->set_value(std::move(mk_tnsr(
                     reinterpret_cast<Transceiver *>(this->team()), _dtype,
                     this->shape(), l_allocated, l_aligned, l_offset, l_sizes,
@@ -178,9 +165,10 @@ struct DeferredArange : public Deferred {
 };
 
 ddptensor *Creator::arange(uint64_t start, uint64_t end, uint64_t step,
-                           DTypeId dtype, uint64_t team) {
+                           DTypeId dtype, const std::string &device,
+                           uint64_t team) {
   return new ddptensor(
-      defer<DeferredArange>(start, end, step, dtype, mkTeam(team)));
+      defer<DeferredArange>(start, end, step, dtype, device, mkTeam(team)));
 }
 
 // ***************************************************************************
@@ -192,27 +180,22 @@ struct DeferredLinspace : public Deferred {
 
   DeferredLinspace() = default;
   DeferredLinspace(double start, double end, uint64_t num, bool endpoint,
-                   DTypeId dtype, uint64_t team)
-      : Deferred(dtype, {static_cast<shape_type::value_type>(num)}, team, true),
+                   DTypeId dtype, const std::string &device, uint64_t team)
+      : Deferred(dtype, {static_cast<shape_type::value_type>(num)}, device,
+                 team),
         _start(start), _end(end), _num(num), _endpoint(endpoint) {}
 
   bool generate_mlir(::mlir::OpBuilder &builder, const ::mlir::Location &loc,
                      jit::DepManager &dm) override {
-    // ::mlir::Value
-    auto teamV = team() == 0
-                     ? ::mlir::Value()
-                     : ::imex::createIndex(loc, builder,
-                                           reinterpret_cast<uint64_t>(team()));
-
     auto start = ::imex::createFloat(loc, builder, _start);
     auto stop = ::imex::createFloat(loc, builder, _end);
     auto num = ::imex::createIndex(loc, builder, _num);
-    auto rTyp = ::imex::ptensor::PTensorType::get(
-        shape(), imex::ptensor::toMLIR(builder, jit::getPTDType(_dtype)));
+    auto dtyp = jit::getPTDType(dtype());
+    auto envs = jit::mkEnvs(builder, rank(), _device, team());
 
     dm.addVal(this->guid(),
               builder.create<::imex::ptensor::LinSpaceOp>(
-                  loc, rTyp, start, stop, num, _endpoint, nullptr, teamV),
+                  loc, start, stop, num, _endpoint, dtyp, envs),
               [this](uint64_t rank, void *l_allocated, void *l_aligned,
                      intptr_t l_offset, const intptr_t *l_sizes,
                      const intptr_t *l_strides, void *o_allocated,
@@ -244,9 +227,10 @@ struct DeferredLinspace : public Deferred {
 };
 
 ddptensor *Creator::linspace(double start, double end, uint64_t num,
-                             bool endpoint, DTypeId dtype, uint64_t team) {
-  return new ddptensor(
-      defer<DeferredLinspace>(start, end, num, endpoint, dtype, mkTeam(team)));
+                             bool endpoint, DTypeId dtype,
+                             const std::string &device, uint64_t team) {
+  return new ddptensor(defer<DeferredLinspace>(start, end, num, endpoint, dtype,
+                                               device, mkTeam(team)));
 }
 
 // ***************************************************************************
@@ -255,11 +239,12 @@ extern DTypeId DEFAULT_FLOAT;
 extern DTypeId DEFAULT_INT;
 
 std::pair<ddptensor *, bool> Creator::mk_future(const py::object &b,
+                                                const std::string &device,
                                                 uint64_t team, DTypeId dtype) {
   if (py::isinstance<ddptensor>(b)) {
     return {b.cast<ddptensor *>(), false};
   } else if (py::isinstance<py::float_>(b) || py::isinstance<py::int_>(b)) {
-    return {Creator::full({}, b, dtype, team), true};
+    return {Creator::full({}, b, dtype, device, team), true};
   }
   throw std::runtime_error(
       "Invalid right operand to elementwise binary operation");
 
@@ -22,12 +22,12 @@ DDPTensorImpl::DDPTensorImpl(
     uint64_t *lo_allocated, uint64_t *lo_aligned, rank_type owner)
     : _owner(owner), _transceiver(transceiver), _gShape(gShape),
       _lo_allocated(lo_allocated), _lo_aligned(lo_aligned),
-      _lhsHalo(gShape.size(), l_allocated, l_aligned, l_offset, l_sizes,
-               l_strides),
-      _lData(gShape.size(), o_allocated, o_aligned, o_offset, o_sizes,
-             o_strides),
-      _rhsHalo(gShape.size(), r_allocated, r_aligned, r_offset, r_sizes,
-               r_strides),
+      _lhsHalo(l_allocated ? gShape.size() : 0, l_allocated, l_aligned,
+               l_offset, l_sizes, l_strides),
+      _lData(o_allocated ? gShape.size() : 0, o_allocated, o_aligned, o_offset,
+             o_sizes, o_strides),
+      _rhsHalo(r_allocated ? gShape.size() : 0, r_allocated, r_aligned,
+               r_offset, r_sizes, r_strides),
       _dtype(dtype) {
   if (ndims() == 0) {
     _owner = REPLICATED;
@@ -183,7 +183,7 @@ int64_t DDPTensorImpl::__int__() const {
 void DDPTensorImpl::add_to_args(std::vector<void *> &args) {
   int ndims = this->ndims();
   auto storeMR = [ndims](DynMemRef &mr) -> intptr_t * {
-    intptr_t *buff = new intptr_t[dtensor_sz(ndims)];
+    intptr_t *buff = new intptr_t[memref_sz(ndims)];
     buff[0] = reinterpret_cast<intptr_t>(mr._allocated);
     buff[1] = reinterpret_cast<intptr_t>(mr._aligned);
     buff[2] = static_cast<intptr_t>(mr._offset);
@@ -192,29 +192,22 @@ void DDPTensorImpl::add_to_args(std::vector<void *> &args) {
     return buff;
   }; // FIXME memory leak?
 
-  if (_transceiver == nullptr) {
+  if (_transceiver == nullptr || ndims == 0) {
     // no-dist-mode
     args.push_back(storeMR(_lData));
   } else {
-    // transceiver/team first
-    // args.push_back(_transceiver);
-    // local tensor first
-    if (ndims > 0) {
-      args.push_back(storeMR(_lhsHalo));
-      args.push_back(storeMR(_lData));
-      args.push_back(storeMR(_rhsHalo));
-      assert(5 == memref_sz(1));
-      // local offsets last
-      auto buff = new intptr_t[dtensor_sz(1)];
-      buff[0] = reinterpret_cast<intptr_t>(_lo_allocated);
-      buff[1] = reinterpret_cast<intptr_t>(_lo_aligned);
-      buff[2] = 0;
-      buff[3] = ndims;
-      buff[4] = 1;
-      args.push_back(buff);
-    } else {
-      args.push_back(storeMR(_lData));
-    }
+    args.push_back(storeMR(_lhsHalo));
+    args.push_back(storeMR(_lData));
+    args.push_back(storeMR(_rhsHalo));
+    // local offsets last
+    auto buff = new intptr_t[memref_sz(1)];
+    assert(5 == memref_sz(1));
+    buff[0] = reinterpret_cast<intptr_t>(_lo_allocated);
+    buff[1] = reinterpret_cast<intptr_t>(_lo_aligned);
+    buff[2] = 0;
+    buff[3] = ndims;
+    buff[4] = 1;
+    args.push_back(buff);
   }
 }
 
 
@@ -43,8 +43,8 @@ Deferred::future_type Deferred::get_future() {
           _guid,
           _dtype,
           _shape,
-          _team,
-          _balanced};
+          _device,
+          _team};
 }
 
 // defer a tensor-producing computation by adding it to the queue.
Original file line number	Diff line number	Diff line change
`@@ -69,27 +69,27 @@ def to_numpy(a):`
`69`	`69`	`FUNC = func.upper()`
`70`	`70`	`if func == "full":`
`71`	`71`	`exec(`
`72`		`- f"{func} = lambda shape, val, dtype, team=1: dtensor(_cdt.Creator.full(shape, val, dtype, team))"`
	`72`	`+ f"{func} = lambda shape, val, dtype=float64, device='', team=1: dtensor(_cdt.Creator.full(shape, val, dtype, device, team))"`
`73`	`73`	`)`
`74`	`74`	`elif func == "empty":`
`75`	`75`	`exec(`
`76`		`- f"{func} = lambda shape, dtype, team=1: dtensor(_cdt.Creator.full(shape, None, dtype, team))"`
	`76`	`+ f"{func} = lambda shape, dtype=float64, device='', team=1: dtensor(_cdt.Creator.full(shape, None, dtype, device, team))"`
`77`	`77`	`)`
`78`	`78`	`elif func == "ones":`
`79`	`79`	`exec(`
`80`		`- f"{func} = lambda shape, dtype, team=1: dtensor(_cdt.Creator.full(shape, 1, dtype, team))"`
	`80`	`+ f"{func} = lambda shape, dtype=float64, device='', team=1: dtensor(_cdt.Creator.full(shape, 1, dtype, device, team))"`
`81`	`81`	`)`
`82`	`82`	`elif func == "zeros":`
`83`	`83`	`exec(`
`84`		`- f"{func} = lambda shape, dtype, team=1: dtensor(_cdt.Creator.full(shape, 0, dtype, team))"`
	`84`	`+ f"{func} = lambda shape, dtype=float64, device='', team=1: dtensor(_cdt.Creator.full(shape, 0, dtype, device, team))"`
`85`	`85`	`)`
`86`	`86`	`elif func == "arange":`
`87`	`87`	`exec(`
`88`		`- f"{func} = lambda start, end, step, dtype, team=1: dtensor(_cdt.Creator.arange(start, end, step, dtype, team))"`
	`88`	`+ f"{func} = lambda start, end, step, dtype=int64, device='', team=1: dtensor(_cdt.Creator.arange(start, end, step, dtype, device, team))"`
`89`	`89`	`)`
`90`	`90`	`elif func == "linspace":`
`91`	`91`	`exec(`
`92`		`- f"{func} = lambda start, end, step, endpoint, dtype, team=1: dtensor(_cdt.Creator.linspace(start, end, step, endpoint, dtype, team))"`
	`92`	`+ f"{func} = lambda start, end, step, endpoint, dtype=float64, device='', team=1: dtensor(_cdt.Creator.linspace(start, end, step, endpoint, dtype, device, team))"`
`93`	`93`	`)`
`94`	`94`
`95`	`95`	`for func in api.api_categories["ReduceOp"]:`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-06523703fa96f9731b9c3c1c7f6fee0914ef9f26`
	`1`	`+dd921a5893d2956ddee3d7fecb84612edf15fbbe`