successfully implemented e2e tests

junikimm717 · junikimm717 · commit 316d1b0d4ec7 · 2025-04-01T11:29:59.000-04:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -16,7 +16,6 @@ set(CMAKE_CXX_FLAGS "-O3 -march=native -Wall -Wno-narrowing -fPIC")
 
 add_subdirectory(include)
 add_subdirectory(src)
-add_subdirectory(examples)
 
 #### binsparse reference implementation configuration
 
@@ -67,3 +66,8 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${taco_SOURCE_DIR}/include)
 
 target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_BINARY_DIR}/include)
 target_link_libraries(${PROJECT_NAME} PUBLIC binsparse-rc taco)
+
+if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
+  add_subdirectory(examples)
+  add_subdirectory(test)
+endif()
diff --git a/dev.sh b/dev.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env sh
+
+CONTAINER='bsp-to-taco'
+IMAGE='docker.io/junikimm717/nvim2025:finch'
+DIR="$(realpath "$(dirname "$0")")"
+
+case "$1" in
+  pull|p)
+    podman pull "$IMAGE"
+    (podman container ls | grep "$CONTAINER" > /dev/null 2>&1) && podman container rm -fv "$CONTAINER"
+    ;;
+  clear|c)
+    (podman container ls -a | grep "$CONTAINER" > /dev/null 2>&1)\
+    && {
+      podman container kill "$CONTAINER" > /dev/null 2>&1;
+      podman container rm "$CONTAINER"
+    }
+    ;;
+  *)
+    set +x
+    if ! (podman container ls -a | grep "$CONTAINER" > /dev/null 2>&1); then
+      podman run\
+        -dt\
+        --name "$CONTAINER"\
+        --group-add keep-groups\
+        -v "$DIR:/workspace"\
+        --privileged\
+        --rm\
+        "$IMAGE"
+    fi || exit 1
+    podman exec\
+      -e ENV=/root/.profile\
+      -it "$CONTAINER" /bin/bash
+    ;;
+esac
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -9,3 +9,4 @@ endfunction()
 
 add_example(taco_experiments)
 add_example(parse_taco)
+add_example(tensor_test)
diff --git a/examples/parse_taco.cpp b/examples/parse_taco.cpp
@@ -25,7 +25,7 @@ int main(int argc, char** argv) {
     for (int j = 0; j < index.getModeIndex(i).numIndexArrays(); j++) {
       auto array = index.getModeIndex(i).getIndexArray(j);
       for (int k = 0; k < array.getSize(); k++) {
-        cout << array.get(k).get().int64Value << " ";
+        cout << array.get(k).get().int32Value << " ";
       }
       cout << "\n";
     }
@@ -50,6 +50,5 @@ int main(int argc, char** argv) {
   for (int i = 0; i < vals.getSize(); i++) std::cout << vals.get(i).get().float64Value << " ";
   cout << "\n";
   auto format = tensor.getStorage().getFormat();
-  //bsp_destroy_tensor_t(bsp);
   return 0;
 }
diff --git a/examples/taco_experiments.cpp b/examples/taco_experiments.cpp
@@ -26,16 +26,16 @@ int main() {
 
   Format fmt({Compressed({ModeFormat::ORDERED, ModeFormat::NOT_UNIQUE}),
               Singleton({ModeFormat::ORDERED, ModeFormat::NOT_UNIQUE})});
-  Tensor<double> A({5, 5}, fmt);
+  Tensor<double> A({4, 2}, fmt);
 
   // TODO: generate with finch, use experimental parse_taco.cpp and check the
   // tensor below is the same.
 
   // Insert non-zero values with explicit coordinates
-  A.insert({0, 0}, 3.5);
-  A.insert({1, 2}, 4.2);
-  A.insert({1, 1}, 1.1);
-  A.insert({2, 3}, 1.4);
+  A.insert({0, 1}, 1.0);
+  A.insert({1, 0}, 1.0);
+  A.insert({3, 0}, 3.0);
+  A.insert({3, 1}, 4.0);
 
   A.pack();
 
@@ -60,8 +60,8 @@ int main() {
 
   cout << "===================\n";
   cout << "Extract some values: \n";
-  for (int k = 0; k < 5; k++) {
-    for (int i = 0; i < 5; i++) {
+  for (int k = 0; k < 4; k++) {
+    for (int i = 0; i < 2; i++) {
       cout << A(k, i) << " ";
     }
     cout << "\n\n";
diff --git a/examples/tensor_test.cpp b/examples/tensor_test.cpp
@@ -0,0 +1,47 @@
+#include <binsparse/tensor.h>
+#include <binsparse/read_tensor.h>
+#include <binsparse/write_tensor.h>
+#include "bsp_to_taco.hpp"
+#include "taco/tensor.h"
+#include "taco_to_bsp.hpp"
+
+using namespace std;
+
+int main(int argc, char** argv) {
+  if (argc < 3) {
+    fprintf(stderr, "usage: ./tensor_test [file_name.h5] [output_file_name.h5]\n");
+    return 1;
+  }
+  char* file_name = argv[1];
+  bsp_tensor_t tensor = bsp_read_tensor(argv[1], NULL);
+  taco::TensorBase taco = makeTacoTensor(tensor);
+  tensor = makeBspTensor(taco);
+  {
+    bsp_level_t* curLevel = tensor.level;
+    while(true) {
+      bool breakvar = false;
+      switch(curLevel->kind) {
+        case BSP_TENSOR_DENSE: {
+          cout << "dense layer!" << endl;
+          bsp_dense_t* data = (bsp_dense_t*) curLevel->data;
+          curLevel = data->child;
+          break;
+        }
+        case BSP_TENSOR_SPARSE: {
+          cout << "sparse layer!" << endl;
+          bsp_sparse_t* data = (bsp_sparse_t*) curLevel->data;
+          curLevel = data->child;
+          break;
+        }
+        case BSP_TENSOR_ELEMENT: {
+          breakvar = true;
+          break;
+        }
+      }
+      if (breakvar) break;
+    }
+  }
+  bsp_write_tensor(argv[2], tensor, NULL, NULL, 9);
+  bsp_destroy_tensor_t(tensor);
+  return 0;
+}
diff --git a/include/taco_to_bsp.hpp b/include/taco_to_bsp.hpp
@@ -0,0 +1,6 @@
+#include "taco/tensor.h"
+#include <binsparse/tensor.h>
+#include <binsparse/read_tensor.h>
+#include <taco.h>
+
+bsp_tensor_t makeBspTensor(taco::TensorBase tacoTensor);
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -4,4 +4,5 @@
 
 target_sources(bsp-to-taco PRIVATE
   src/bsp_to_taco.cpp
+  src/taco_to_bsp.cpp
 )
diff --git a/src/bsp_to_taco.cpp b/src/bsp_to_taco.cpp
@@ -8,8 +8,6 @@
 #include "taco/storage/storage.h"
 #include "taco/tensor.h"
 #include "taco/type.h"
-#include <algorithm>
-
 
 static std::vector<int> getDimensions(bsp_tensor_t& tensor) {
   std::vector<int> dims(tensor.rank);
@@ -49,15 +47,15 @@ static inline taco::Datatype getTacoDataType(bsp_array_t& array) {
   case BSP_COMPLEX_FLOAT64:
     return taco::Complex128;
   default:
-    taco_ierror;
+    taco_uerror << "Unsupported type supplied to taco converter";
     return taco::Float64;
   }
 }
 
 static taco::Array bspToTacoArray(bsp_array_t& array) {
   taco::Datatype dataType = getTacoDataType(array);
   bsp_type_t type = array.type;
-  taco::Array res = taco::makeArray(getTacoDataType(array), array.size);
+  taco::Array res = taco::makeArray(dataType, array.size);
   // eventually, get rid of memcpy?
   memcpy(res.getData(), array.data, array.size * dataType.getNumBytes());
   return res;
@@ -91,9 +89,7 @@ static taco::Format createTacoFormat(bsp_tensor_t& tensor) {
     std::vector<int> modeOrdering(tensor.rank);
     for (int i = 0; i < tensor.rank; i++) {
       modeOrdering[i] = tensor.transpose[i];
-      std::cout << tensor.transpose[i] << " ";
     }
-    std::cout << " size: " << modeTypes.size() << "\n";
     return taco::Format(modeTypes, modeOrdering);
   }
   return taco::Format(modeTypes);
@@ -110,22 +106,28 @@ static taco::Index createTacoIndex(bsp_tensor_t& tensor, taco::Format& format) {
       level = ((bsp_dense_t*) level->data)->child;
       break;
     }
-    // eventually, this should probably not be UserOwns.
     case BSP_TENSOR_SPARSE: {
       bsp_sparse_t* data = (bsp_sparse_t*) level->data;
+      if (data->pointers_to != NULL && data->pointers_to->type != BSP_INT32) {
+        taco_uerror << "pointers_to just be an int32 type to interface "
+                    << "properly with taco!";
+      }
       modeIndices.push_back(taco::ModeIndex({
           data->pointers_to != NULL
               ? taco::Array(getTacoDataType(*data->pointers_to),
                             data->pointers_to->data, data->pointers_to->size,
                             taco::Array::Free)
               : taco::makeArray({
-                    (int64_t) 0,
-                    (int64_t) data->indices[0].size,
+                    (int) 0,
+                    (int) data->indices[0].size,
                 }),
           taco::Array(getTacoDataType(data->indices[0]), data->indices[0].data,
                       data->indices[0].size, taco::Array::Free),
       }));
       for (int i = 1; i < data->rank; i++) {
+        if (data->indices[i].type != BSP_INT32)
+          taco_terror << "indices just be an int32 type to interface properly "
+                      << "with taco!";
         modeIndices.push_back(taco::ModeIndex({
             taco::makeArray(getTacoDataType(data->indices[i]), 0),
             taco::Array(getTacoDataType(data->indices[i]),
@@ -143,6 +145,10 @@ static taco::Index createTacoIndex(bsp_tensor_t& tensor, taco::Format& format) {
   return taco::Index(format, modeIndices);
 }
 
+/*
+Creates a taco object from a bsp tensor.
+Note that this function **consumes** the bsp tensor object!
+*/
 taco::TensorBase makeTacoTensor(bsp_tensor_t& tensor) {
   bsp_level_t* level = tensor.level;
 
@@ -151,7 +157,7 @@ taco::TensorBase makeTacoTensor(bsp_tensor_t& tensor) {
   taco::Index tacoIndex = createTacoIndex(tensor, tacoFormat);
   taco::TensorBase tacoTensor(getTacoDataType(values), getDimensions(tensor),
                               tacoFormat);
-  //tacoTensor.setNeedsPack(false);
+  // tacoTensor.setNeedsPack(false);
   auto storage = tacoTensor.getStorage();
   storage.setIndex(tacoIndex);
   storage.setValues(bspToTacoArray(values));
diff --git a/src/taco_to_bsp.cpp b/src/taco_to_bsp.cpp
@@ -0,0 +1,138 @@
+#include "binsparse/types.h"
+#include "taco/format.h"
+#include <binsparse/tensor.h>
+#include <binsparse/write_tensor.h>
+#include <taco.h>
+
+static inline bsp_type_t getTacoDataType(taco::Datatype type) {
+  if (type == taco::UInt8)
+    return BSP_UINT8;
+  else if (type == taco::UInt16)
+    return BSP_UINT16;
+  else if (type == taco::UInt32)
+    return BSP_UINT32;
+  else if (type == taco::UInt64)
+    return BSP_UINT64;
+  else if (type == taco::Int8)
+    return BSP_INT8;
+  else if (type == taco::Int16)
+    return BSP_INT16;
+  else if (type == taco::Int32)
+    return BSP_INT32;
+  else if (type == taco::Int64)
+    return BSP_INT64;
+  else if (type == taco::Float32)
+    return BSP_FLOAT32;
+  else if (type == taco::Float64)
+    return BSP_FLOAT64;
+  else if (type == taco::Int8)
+    return BSP_BINT8;
+  else if (type == taco::Complex64)
+    return BSP_COMPLEX_FLOAT32;
+  else if (type == taco::Complex128)
+    return BSP_COMPLEX_FLOAT64;
+  else {
+    taco_uerror << "Unsupported type supplied to taco converter";
+    return BSP_INVALID_TYPE;
+  }
+}
+
+static bsp_array_t makeBspIndexArray(taco::Array arr) {
+  taco_uassert(arr.getType() == taco::Int32);
+  bsp_array_t res = bsp_construct_array_t(arr.getSize(), BSP_INT32);
+  memcpy(res.data, arr.getData(), taco::Int32.getNumBytes() * arr.getSize());
+  return res;
+}
+
+bsp_tensor_t makeBspTensor(taco::TensorBase tacoTensor) {
+  bsp_tensor_t res = bsp_construct_default_tensor_t();
+  auto storage = tacoTensor.getStorage();
+  auto index = storage.getIndex();
+
+  // copy over the transposes.
+  auto modeOrdering = storage.getFormat().getModeOrdering();
+  res.transpose = (size_t*) malloc(sizeof(size_t) * modeOrdering.size());
+  for (int i = 0; i < modeOrdering.size(); i++) {
+    res.transpose[i] = (size_t) modeOrdering[i];
+  }
+
+  // copy over the dimensions.
+  auto dims = storage.getDimensions();
+  res.rank = dims.size();
+  res.dims = (size_t*) malloc(sizeof(size_t) * dims.size());
+  for (int i = 0; i < dims.size(); i++) {
+    res.dims[i] = dims[i];
+  }
+
+  std::vector<taco::ModeFormat> formats = storage.getFormat().getModeFormats();
+
+  int dimsPtr = 0;
+  res.level = (bsp_level_t*) malloc(sizeof(bsp_level_t));
+  bsp_level_t* curLevel = res.level;
+
+  while (dimsPtr < dims.size()) {
+    taco::ModeFormat format = formats[dimsPtr];
+
+    if (format.getName() == taco::Sparse.getName()) {
+      int boundary = dimsPtr + 1;
+      while (boundary < dims.size() &&
+             typeid(formats[boundary]) != typeid(taco::Singleton)) {
+        boundary++;
+      }
+      curLevel->kind = BSP_TENSOR_SPARSE;
+
+      bsp_sparse_t* data = (bsp_sparse_t*) malloc(sizeof(bsp_sparse_t));
+      curLevel->data = data;
+
+      data->pointers_to = NULL;
+      if (dimsPtr != 0) {
+        data->pointers_to = (bsp_array_t*) malloc(sizeof(bsp_array_t));
+        *data->pointers_to =
+            makeBspIndexArray(index.getModeIndex(dimsPtr).getIndexArray(0));
+      }
+
+      data->rank = boundary - dimsPtr;
+      data->indices = (bsp_array_t*) malloc(sizeof(bsp_array_t) * data->rank);
+      for (int indicesIdx = 0; indicesIdx < data->rank; indicesIdx++) {
+        data->indices[indicesIdx] = makeBspIndexArray(
+            index.getModeIndex(dimsPtr + indicesIdx).getIndexArray(1));
+      }
+
+      data->child = (bsp_level_t*) malloc(sizeof(bsp_level_t));
+      curLevel = data->child;
+
+      dimsPtr = boundary;
+    } else if (format.getName() == taco::Dense.getName()) {
+      curLevel->kind = BSP_TENSOR_DENSE;
+      bsp_dense_t* data = (bsp_dense_t*) malloc(sizeof(bsp_dense_t));
+      curLevel->data = data;
+
+      data->rank = 1;
+      data->child = (bsp_level_t*) malloc(sizeof(bsp_level_t));
+      curLevel = data->child;
+
+      dimsPtr++;
+    } else {
+      taco_uerror << "This should be impossible; neither dense nor sparse";
+    }
+  }
+
+  // code to deal with copying over the actual data.
+  {
+    curLevel->kind = BSP_TENSOR_ELEMENT;
+    taco::Array values = storage.getValues();
+    res.nnz = values.getSize();
+    bsp_element_t* data = (bsp_element_t*) malloc(sizeof(bsp_element_t));
+
+    bsp_array_t valuesArray =
+        bsp_construct_array_t(res.nnz, getTacoDataType(values.getType()));
+    memcpy(valuesArray.data, values.getData(),
+           values.getType().getNumBytes() * values.getSize());
+
+    bsp_array_t* arr = (bsp_array_t*) malloc(sizeof(bsp_array_t));
+    *arr = valuesArray;
+
+    curLevel->data = arr;
+  }
+  return res;
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
diff --git a/test/tensor_test.jl b/test/tensor_test.jl

Original file line number	Diff line number	Diff line change
`@@ -9,3 +9,4 @@ endfunction()`
`9`	`9`
`10`	`10`	`add_example(taco_experiments)`
`11`	`11`	`add_example(parse_taco)`
	`12`	`+add_example(tensor_test)`
Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ int main(int argc, char** argv) {`
`25`	`25`	`for (int j = 0; j < index.getModeIndex(i).numIndexArrays(); j++) {`
`26`	`26`	`auto array = index.getModeIndex(i).getIndexArray(j);`
`27`	`27`	`for (int k = 0; k < array.getSize(); k++) {`
`28`		`- cout << array.get(k).get().int64Value << " ";`
	`28`	`+ cout << array.get(k).get().int32Value << " ";`
`29`	`29`	`}`
`30`	`30`	`cout << "\n";`
`31`	`31`	`}`
`@@ -50,6 +50,5 @@ int main(int argc, char** argv) {`
`50`	`50`	`for (int i = 0; i < vals.getSize(); i++) std::cout << vals.get(i).get().float64Value << " ";`
`51`	`51`	`cout << "\n";`
`52`	`52`	`auto format = tensor.getStorage().getFormat();`
`53`		`- //bsp_destroy_tensor_t(bsp);`
`54`	`53`	`return 0;`
`55`	`54`	`}`
Original file line number	Diff line number	Diff line change
`@@ -4,4 +4,5 @@`
`4`	`4`
`5`	`5`	`target_sources(bsp-to-taco PRIVATE`
`6`	`6`	`src/bsp_to_taco.cpp`
	`7`	`+ src/taco_to_bsp.cpp`
`7`	`8`	`)`