Implement MultiBatchVmapTransform::logicalToPhysical(TensorList) (pytorch#41942)

zou3519 · facebook-github-bot · commit 0571cfd8759d · 2020-07-28T07:45:25.000-07:00
Summary: Pull Request resolved: pytorch#41942 This function: - permutes all batch dims to the front of the tensors - aligns all the batch dims to the collective levels of all the tensors - expands all of the batch dims such that they are present in each of the result tensors This function is useful for the next diff up on the stack (which is implementing a fallback kernel for BatchedTensor). It's also useful in general for implementing batching rules on operators that take in multiple batch dimensions at the front of each tensor (but we don't have too many of those in PyTorch). Test Plan: - `./build/bin/vmap_test` Reviewed By: ezyang Differential Revision: D22764104 Pulled By: zou3519 fbshipit-source-id: d42cc8824a1bcf258687de164b7853af52852f53
diff --git a/aten/src/ATen/VmapTransforms.cpp b/aten/src/ATen/VmapTransforms.cpp
@@ -55,11 +55,6 @@ VmapPhysicalView MultiBatchVmapTransform::logicalToPhysical(const Tensor& logica
   return { permuteBatchDimsToFront(batched), createLevelsBitset(batched->bdims()) };
 }
 
-std::vector<VmapPhysicalView>
-MultiBatchVmapTransform::logicalToPhysical(TensorList logical_tensors) {
-  TORCH_INTERNAL_ASSERT(false, "NYI");
-}
-
 int64_t VmapPhysicalView::numBatchDims() const {
   return levels_.count();
 }
@@ -186,6 +181,63 @@ static Tensor alignBatchDimsAtFront(
   return physical_tensor.view(aligned_sizes);
 }
 
+// The algorithm is as follows:
+// 1. Figure out what all of the collective levels in `logical_tensors` is.
+// 2. Move all batch dims to the front of the tensors and add extra dims
+//    of size 1. At this point, every tensor will have a dimension for
+//    each of the collective levels.
+// 3. Compute the batch_sizes.
+// 4. Expand each physical tensor so that they have output batch size equal
+//    to `batch_sizes`
+VmapPhysicalViewVec
+MultiBatchVmapTransform::logicalToPhysical(TensorList logical_tensors) {
+  // Figure out all of the collective vmap levels in `logical_tensors`.
+  std::bitset<kVmapNumLevels> collective_levels;
+  for (const auto& logical_tensor : logical_tensors) {
+    auto* batched = maybeGetBatched(logical_tensor);
+    if (batched) {
+      collective_levels |= createLevelsBitset(batched->bdims());
+    }
+  }
+
+  // Populate physical_tensors.
+  // This contains a list of regular (non-Batched) Tensors where all of the
+  // batch dims have been moved to the front of the tensor. Any previously
+  // non-existing batch dims get added to the tensors as new dimensions of size 1.
+  std::vector<Tensor> physical_tensors;
+  int64_t num_batch_dims = collective_levels.count();
+  for (const auto& logical_tensor : logical_tensors) {
+    auto requested_example_dim = /*logical_dim*/logical_tensor.dim();
+    auto physical_tensor = alignBatchDimsAtFront(
+        logical_tensor, collective_levels, requested_example_dim);
+    physical_tensors.push_back(std::move(physical_tensor));
+  }
+
+  // Compute batch_sizes
+  VmapDimVector batch_sizes(num_batch_dims, 1);
+  for (const auto& physical_tensor : physical_tensors) {
+    auto physical_sizes = physical_tensor.sizes();
+    for (int64_t dim = 0; dim < num_batch_dims; dim++) {
+      if (physical_sizes[dim] != 1) {
+        batch_sizes[dim] = physical_sizes[dim];
+      }
+    }
+  }
+
+  // Expand each physical_tensor so that it has batch sizes `batch_sizes`
+  VmapPhysicalViewVec result;
+  for (const auto& physical_tensor : physical_tensors) {
+    VmapDimVector expanded_size(batch_sizes.begin(), batch_sizes.end());
+    auto physical_sizes = physical_tensor.sizes();
+    expanded_size.insert(
+        expanded_size.end(),
+        physical_sizes.begin() + num_batch_dims,
+        physical_sizes.end());
+    result.emplace_back(physical_tensor.expand(expanded_size), collective_levels);
+  }
+  return result;
+}
+
 static std::pair<std::bitset<kVmapNumLevels>,int64_t>
 getLevelsAndLargestLogicalDim(TensorList logical_tensors) {
   TORCH_INTERNAL_ASSERT(logical_tensors.size() > 0);
diff --git a/aten/src/ATen/VmapTransforms.h b/aten/src/ATen/VmapTransforms.h
@@ -53,7 +53,7 @@ using VmapDimVector = SmallVector<int64_t, kVmapStaticDimVecSize>;
 // and returns a VmapPhysicalView on the tensor(s).
 struct TORCH_API MultiBatchVmapTransform {
   static VmapPhysicalView logicalToPhysical(const Tensor& logical_tensor);
-  static std::vector<VmapPhysicalView> logicalToPhysical(TensorList logical_tensors);
+  static VmapPhysicalViewVec logicalToPhysical(TensorList logical_tensors);
 };
 
 // VmapTransform for operators that broadcast all inputs.
diff --git a/aten/src/ATen/test/vmap_test.cpp b/aten/src/ATen/test/vmap_test.cpp
@@ -850,4 +850,217 @@ TEST(VmapTest, TestBatchedTensorPermute) {
   }
 }
 
+static void checkMultiBatchVmapTransform(TensorList inputs, TensorList expected_outputs) {
+  auto outputs = MultiBatchVmapTransform::logicalToPhysical(inputs);
+  ASSERT_EQ(outputs.size(), expected_outputs.size());
+  for (int64_t idx = 0; idx < outputs.size(); idx++) {
+    const auto& output = outputs[idx].tensor();
+    ASSERT_EQ(output.data_ptr(), expected_outputs[idx].data_ptr());
+    ASSERT_EQ(output.sizes(), expected_outputs[idx].sizes());
+    ASSERT_TRUE(at::allclose(output, expected_outputs[idx]));
+  }
+}
+
+TEST(VmapTest, TestMultiBatchVmapTransformBatchedBatched) {
+  {
+    // Check that batch dims get moved to the front
+    int64_t B0 = 5, B1 = 7;
+    Tensor x = at::randn({2, B0, 3, B1});
+    Tensor y = at::randn({B1, 2, 3, B0});
+    Tensor batched_x = makeBatched(x, {{/*lvl*/0, /*dim*/1}, {/*lvl*/1, /*dim*/3}});
+    Tensor batched_y = makeBatched(y, {{/*lvl*/0, /*dim*/3}, {/*lvl*/1, /*dim*/0}});
+
+    checkMultiBatchVmapTransform(
+        {batched_x, batched_y},
+        {at::movedim(x, {1, 3}, {0, 1}), at::movedim(y, {0, 3}, {1, 0})});
+  }
+  {
+    // Check that batch dims become broadcasted and are present in all returns
+    int64_t B0 = 5, B1 = 7, B2 = 9;
+    Tensor x = at::randn({B0, B2, 2, 3});
+    Tensor y = at::randn({B0, B1, 2, 3});
+    Tensor batched_x = makeBatched(x, {{/*lvl*/0, /*dim*/0}, {/*lvl*/2, /*dim*/1}});
+    Tensor batched_y = makeBatched(y, {{/*lvl*/0, /*dim*/0}, {/*lvl*/1, /*dim*/1}});
+
+    checkMultiBatchVmapTransform(
+        {batched_x, batched_y},
+        {x.unsqueeze(1).expand({B0, B1, B2, 2, 3}), y.unsqueeze(2).expand({B0, B1, B2, 2, 3})});
+  }
+  {
+    // Check operation on tensors of different logical dims
+    int64_t B0 = 5;
+    Tensor x = at::randn({B0, 3});
+    Tensor y = at::randn({B0, 2, 3});
+    Tensor batched_x = makeBatched(x, {{/*lvl*/0, /*dim*/0}});
+    Tensor batched_y = makeBatched(y, {{/*lvl*/0, /*dim*/0}});
+
+    checkMultiBatchVmapTransform({batched_x, batched_y}, {x, y});
+  }
+  {
+    // More complicated example with two tensors.
+    int64_t B0 = 5, B1 = 7, B2 = 11, B3 = 13;
+    Tensor x = at::randn({2, B0, 3, B2});
+    Tensor y = at::randn({B3, 3, B1});
+    Tensor batched_x = makeBatched(x, {{/*lvl*/0, /*dim*/1}, {/*lvl*/2, /*dim*/3}});
+    Tensor batched_y = makeBatched(y, {{/*lvl*/1, /*dim*/2}, {/*lvl*/3, /*dim*/0}});
+
+    checkMultiBatchVmapTransform(
+        {batched_x, batched_y},
+        {
+          x.permute({1, 3, 0, 2}).view({B0, 1, B2, 1, 2, 3}).expand({B0, B1, B2, B3, 2, 3}),
+          y.permute({2, 0, 1}).view({1, B1, 1, B3, 3}).expand({B0, B1, B2, B3, 3}),
+        });
+  }
+  {
+    // Edge case: BatchedTensor "scalar" handling
+    int64_t B0 = 5, B2 = 11;
+    Tensor x = at::randn({B0});
+    Tensor y = at::randn({B0, B2});
+    Tensor batched_x = makeBatched(x, {{/*lvl*/0, /*dim*/0}});
+    Tensor batched_y = makeBatched(y, {{/*lvl*/0, /*dim*/0}, {/*lvl*/1, /*dim*/1}});
+
+    checkMultiBatchVmapTransform({batched_x, batched_y}, {x.view({B0, 1}).expand({B0, B2}), y});
+    checkMultiBatchVmapTransform({batched_y, batched_x}, {y, x.view({B0, 1}).expand({B0, B2})});
+  }
+  {
+    // Edge case: Only one tensor is a "batchedtensor scalar"
+    int64_t B0 = 5, B2 = 11;
+    Tensor x = at::randn({B0});
+    Tensor y = at::randn({B0, B2, 2});
+    Tensor batched_x = makeBatched(x, {{/*lvl*/0, /*dim*/0}});
+    Tensor batched_y = makeBatched(y, {{/*lvl*/0, /*dim*/0}, {/*lvl*/1, /*dim*/1}});
+
+    checkMultiBatchVmapTransform({batched_x, batched_y}, {x.view({B0, 1}).expand({B0, B2}), y});
+    checkMultiBatchVmapTransform({batched_y, batched_x}, {y, x.view({B0, 1}).expand({B0, B2})});
+  }
+}
+
+TEST(VmapTest, TestMultiBatchVmapTransformBatchedUnbatched) {
+  {
+    // Check same example size
+    int64_t B0 = 5, B1 = 7;
+    Tensor x = at::randn({2, B0, 3, B1});
+    Tensor y = at::randn({2, 3});
+    Tensor batched_x = makeBatched(x, {{/*lvl*/0, /*dim*/1}, {/*lvl*/1, /*dim*/3}});
+
+    checkMultiBatchVmapTransform(
+        {batched_x, y},
+        {at::movedim(x, {1, 3}, {0, 1}), y.view({1, 1, 2, 3}).expand({B0, B1, 2, 3})});
+    checkMultiBatchVmapTransform(
+        {y, batched_x},
+        {y.view({1, 1, 2, 3}).expand({B0, B1, 2, 3}), at::movedim(x, {1, 3}, {0, 1})});
+  }
+  {
+    // BatchedTensor has higher example dim than non-batched-tensor
+    int64_t B0 = 5, B1 = 7;
+    Tensor x = at::randn({B0, B1, 2, 3});
+    Tensor y = at::randn({3});
+    Tensor batched_x = makeBatched(x, {{/*lvl*/0, /*dim*/0}, {/*lvl*/1, /*dim*/1}});
+
+    checkMultiBatchVmapTransform(
+        {batched_x, y}, {x, y.view({1, 1, 3}).expand({B0, B1, 3})});
+    checkMultiBatchVmapTransform(
+        {y, batched_x}, {y.view({1, 1, 3}).expand({B0, B1, 3}), x});
+  }
+  {
+    // BatchedTensor has lower example dim than non-batched-tensor
+    int64_t B0 = 5, B1 = 7;
+    Tensor x = at::randn({B0, B1, 3});
+    Tensor y = at::randn({2, 3});
+    Tensor batched_x = makeBatched(x, {{/*lvl*/0, /*dim*/0}, {/*lvl*/1, /*dim*/1}});
+
+    checkMultiBatchVmapTransform(
+        {batched_x, y}, {x.view({B0, B1, 3}), y.view({1, 1, 2, 3}).expand({B0, B1, 2, 3})});
+    checkMultiBatchVmapTransform(
+        {y, batched_x}, {y.view({1, 1, 2, 3}).expand({B0, B1, 2, 3}), x.view({B0, B1, 3})});
+  }
+  {
+    // Scalar handling
+    int64_t B0 = 5, B1 = 7;
+    Tensor x = at::randn({B0, B1});
+    Tensor y = at::randn({});
+    Tensor batched_x = makeBatched(x, {{/*lvl*/0, /*dim*/0}, {/*lvl*/1, /*dim*/1}});
+
+    checkMultiBatchVmapTransform({batched_x, y}, {x, y.view({1, 1}).expand({B0, B1})});
+    checkMultiBatchVmapTransform({y, batched_x}, {y.view({1, 1}).expand({B0, B1}), x});
+  }
+}
+
+TEST(VmapTest, TestMultiBatchVmapTransformMaxLevels) {
+  {
+    // inputs have all 64 levels
+    auto x = randn(std::vector<int64_t>(kVmapNumLevels, 1));
+    auto y = randn(std::vector<int64_t>(kVmapNumLevels, 1));
+    auto batched_x = makeBatched(x, maxBatchDimsAtFront());
+    auto batched_y = makeBatched(y, maxBatchDimsAtFront());
+
+    checkMultiBatchVmapTransform({batched_x, batched_y}, {x, y});
+  }
+  {
+    // inputs don't have all 64 levels, but results do.
+    int64_t split = 19;
+    auto x = randn(std::vector<int64_t>(split, 1));
+    auto y = randn(std::vector<int64_t>(kVmapNumLevels - split, 1));
+
+    auto tmp = maxBatchDimsAtFront();
+    BatchDims x_bdims(tmp.begin(), tmp.begin() + split);
+
+    // Construct y_bdims.
+    int64_t dim = 0;
+    auto y_bdims_vector = fmap(
+        ArrayRef<BatchDim>(tmp.begin() + split, tmp.end()),
+        [&](const BatchDim& bdim) -> BatchDim {
+          return { bdim.level(), dim++ };
+        });
+    BatchDims y_bdims(y_bdims_vector.begin(), y_bdims_vector.end());
+
+    auto batched_x = makeBatched(x, x_bdims);
+    auto batched_y = makeBatched(y, y_bdims);
+
+    auto expected_size = std::vector<int64_t>(kVmapNumLevels, 1);
+    checkMultiBatchVmapTransform(
+        {batched_x, batched_y},
+        {x.view(expected_size), y.view(expected_size)});
+  }
+}
+
+TEST(VmapTest, TestMultiBatchVmapTransformMultipleTensors) {
+  // Test with three (all batched) tensors
+  {
+    int64_t B0 = 5, B1 = 7, B2 = 9;
+    Tensor x = at::randn({2, B0, 3, B1});
+    Tensor y = at::randn({B1, 4});
+    Tensor z = at::randn({2, B2});
+    Tensor batched_x = makeBatched(x, {{/*lvl*/0, /*dim*/1}, {/*lvl*/1, /*dim*/3}});
+    Tensor batched_y = makeBatched(y, {{/*lvl*/1, /*dim*/0}});
+    Tensor batched_z = makeBatched(z, {{/*lvl*/2, /*dim*/1}});
+
+    checkMultiBatchVmapTransform(
+        {batched_x, batched_y, batched_z},
+        {
+          at::movedim(x, {1, 3}, {0, 1}).view({B0, B1, 1, 2, 3}).expand({B0, B1, B2, 2, 3}),
+          y.view({1, B1, 1, 4}).expand({B0, B1, B2, 4}),
+          z.t().view({1, 1, B2, 2}).expand({B0, B1, B2, 2}),
+        });
+  }
+  // Test with three tensors, some batched, some unbatched
+  {
+    int64_t B0 = 5, B1 = 7, B2 = 9;
+    Tensor x = at::randn({2, 3});
+    Tensor y = at::randn({4, B0});
+    Tensor z = at::randn({B1, 2, B2});
+    Tensor batched_y = makeBatched(y, {{/*lvl*/0, /*dim*/1}});
+    Tensor batched_z = makeBatched(z, {{/*lvl*/1, /*dim*/0}, {/*lvl*/2, /*dim*/2}});
+
+    checkMultiBatchVmapTransform(
+        {x, batched_y, batched_z},
+        {
+          x.view({1, 1, 1, 2, 3}).expand({B0, B1, B2, 2, 3}),
+          y.t().view({B0, 1, 1, 4}).expand({B0, B1, B2, 4}),
+          z.permute({0, 2, 1}).view({1, B1, B2, 2}).expand({B0, B1, B2, 2}),
+        });
+  }
+}
+
+
 } // namespace