implemented parts of reader

junikimm717 · junikimm717 · commit 83e22b504311 · 2025-03-30T12:39:44.000-04:00
diff --git a/.clang-format b/.clang-format
@@ -0,0 +1,12 @@
+# SPDX-FileCopyrightText: 2024 Binsparse Developers
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+---
+BasedOnStyle: LLVM
+PointerAlignment: Left
+ColumnLimit: 80
+AlwaysBreakTemplateDeclarations: Yes
+AllowShortFunctionsOnASingleLine: Empty
+SpaceAfterCStyleCast: true
+---
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+scripts
+venv
+build
+._*
+tensor_test_files
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,69 @@
+include(FetchContent)
+
+cmake_minimum_required(VERSION 3.5)
+project(bsp-to-taco)
+
+add_library(bsp-to-taco SHARED)
+
+cmake_policy(SET CMP0079 NEW)
+
+set(CMAKE_C_STANDARD 11)
+
+#set(CMAKE_CXX_STANDARD 20)
+
+#set(CMAKE_C_FLAGS "-O3 -march=native")
+set(CMAKE_CXX_FLAGS "-O3 -march=native -Wall -Wno-narrowing -fPIC")
+
+add_subdirectory(include)
+add_subdirectory(src)
+add_subdirectory(examples)
+
+#### binsparse reference implementation configuration
+
+FetchContent_Declare(
+  binsparse
+  GIT_REPOSITORY https://github.com/junikimm717/binsparse-reference-c
+  GIT_TAG main
+)
+FetchContent_MakeAvailable(binsparse)
+target_include_directories(bsp-to-taco PUBLIC ${binsparse_SOURCE_DIR}/include)
+
+#### taco configuration
+
+option(CUDA "Build for NVIDIA GPU (CUDA must be preinstalled)" OFF)
+option(PYTHON "Build TACO for python environment" OFF)
+option(OPENMP "Build with OpenMP execution support" OFF)
+option(COVERAGE "Build with code coverage analysis" OFF)
+set(TACO_FEATURE_CUDA 0)
+set(TACO_FEATURE_OPENMP 0)
+set(TACO_FEATURE_PYTHON 0)
+if(CUDA)
+  message("-- Searching for CUDA Installation")
+  find_package(CUDA REQUIRED)
+  add_definitions(-DCUDA_BUILT)
+  set(TACO_FEATURE_CUDA 1)
+endif(CUDA)
+if(OPENMP)
+  message("-- Will use OpenMP for parallel execution")
+  add_definitions(-DUSE_OPENMP)
+  set(TACO_FEATURE_OPENMP 1)
+endif(OPENMP)
+if(PYTHON)
+  message("-- Will build Python extension")
+  add_definitions(-DPYTHON)
+  set(TACO_FEATURE_PYTHON 1)
+endif(PYTHON)
+
+FetchContent_Declare(
+  taco
+  GIT_REPOSITORY https://github.com/junikimm717/taco
+  GIT_TAG master
+)
+FetchContent_MakeAvailable(taco)
+configure_file(${taco_SOURCE_DIR}/include/taco/version.h.in ${taco_SOURCE_DIR}/include/taco/version.h @ONLY)
+target_include_directories(${PROJECT_NAME} PUBLIC ${taco_SOURCE_DIR}/include)
+
+#### Project Configuration
+
+target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_BINARY_DIR}/include)
+target_link_libraries(${PROJECT_NAME} PUBLIC binsparse-rc taco)
diff --git a/compile_flags.txt b/compile_flags.txt
@@ -0,0 +1,6 @@
+-I./include
+-I./build/include
+-DBSP_USE_HDF5
+-I/usr/include/hdf5/serial
+-I./build/_deps/taco-src/include
+-I./build/_deps/binsparse-src/include
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -0,0 +1,11 @@
+# SPDX-FileCopyrightText: 2024 Binsparse Developers
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+function(add_example example_name)
+  add_executable(${example_name} ${example_name}.cpp)
+  target_link_libraries(${example_name} bsp-to-taco)
+endfunction()
+
+add_example(taco_experiments)
+add_example(parse_taco)
diff --git a/examples/parse_taco.cpp b/examples/parse_taco.cpp
@@ -0,0 +1,55 @@
+#include <binsparse/tensor.h>
+#include <binsparse/read_tensor.h>
+#include <binsparse/write_tensor.h>
+#include <iostream>
+#include "bsp_to_taco.hpp"
+using namespace std;
+
+int main(int argc, char** argv) {
+  if (argc < 2) {
+    fprintf(stderr, "usage: ./parse_taco [file_name.h5]\n");
+    return 1;
+  }
+  auto bsp = bsp_read_tensor(argv[1], NULL);
+  taco::Tensor<double> tensor = makeTacoTensor(bsp);
+
+  // so it seems like taco.pack screws over the storage?
+  // tensor.pack();
+
+  auto index = tensor.getStorage().getIndex();
+  cout << "dimension:" << tensor.getDimensions().size() << "\n";
+  for (int i = 0; i < index.numModeIndices(); i++) {
+    cout << "mode index " << i
+         << ", indexArrays: " << index.getModeIndex(i).numIndexArrays()
+         << "\n====\n";
+    for (int j = 0; j < index.getModeIndex(i).numIndexArrays(); j++) {
+      auto array = index.getModeIndex(i).getIndexArray(j);
+      for (int k = 0; k < array.getSize(); k++) {
+        cout << array.get(k).get().int64Value << " ";
+      }
+      cout << "\n";
+    }
+    cout << "====\n";
+  }
+
+  cout << "===================\n";
+  cout << "Extract some values: \n";
+  for (int k = 0; k < 10; k++) {
+    for (int i = 0; i < 10; i++) {
+      cout << tensor(k, i) << " ";
+    }
+    cout << "\n\n";
+  }
+  cout << "===================\n";
+
+  auto dims = tensor.getDimensions();
+  cout << "dims:\n";
+  for (auto &x : dims) std::cout << x << " ";
+  cout << "\nvalues:\n";
+  auto vals = tensor.getStorage().getValues();
+  for (int i = 0; i < vals.getSize(); i++) std::cout << vals.get(i).get().float64Value << " ";
+  cout << "\n";
+  auto format = tensor.getStorage().getFormat();
+  //bsp_destroy_tensor_t(bsp);
+  return 0;
+}
diff --git a/examples/taco_experiments.cpp b/examples/taco_experiments.cpp
@@ -0,0 +1,85 @@
+#include <iostream>
+#include <taco.h>
+
+using namespace taco;
+using namespace std;
+
+int main() {
+  int dim1 = 5, dim2 = 5;
+
+  // Define a 2D tensor in COO format
+
+  // Seems like this format was fucked up :sob:
+  // Format coo({Compressed, Singleton});
+
+  /*
+  order is 2, isUnique is false, isOrdered is true, isAOS is false.
+  It seems like modeOrdering basically determines the order.
+
+  Ok, so for COO, you effectively need to figure out how to load
+  layers in this kind of format, as seen below:
+  */
+
+  // Format fmt({Dense({ModeFormat::ORDERED, ModeFormat::UNIQUE}),
+  //             Compressed({ModeFormat::ORDERED, ModeFormat::UNIQUE})});
+  // Format fmt = COO(2, false, true, false);
+
+  Format fmt({Compressed({ModeFormat::ORDERED, ModeFormat::NOT_UNIQUE}),
+              Singleton({ModeFormat::ORDERED, ModeFormat::NOT_UNIQUE})});
+  Tensor<double> A({5, 5}, fmt);
+
+  // TODO: generate with finch, use experimental parse_taco.cpp and check the
+  // tensor below is the same.
+
+  // Insert non-zero values with explicit coordinates
+  A.insert({0, 0}, 3.5);
+  A.insert({1, 2}, 4.2);
+  A.insert({1, 1}, 1.1);
+  A.insert({2, 3}, 1.4);
+
+  A.pack();
+
+  auto index = A.getStorage().getIndex();
+
+  cout << "elements " << A.getAllocSize() << "\n";
+
+  cout << "dimension:" << A.getDimensions().size() << "\n";
+  for (int i = 0; i < index.numModeIndices(); i++) {
+    cout << "mode index " << i
+         << ", indexArrays: " << index.getModeIndex(i).numIndexArrays()
+         << "\n====\n";
+    for (int j = 0; j < index.getModeIndex(i).numIndexArrays(); j++) {
+      auto array = index.getModeIndex(i).getIndexArray(j);
+      for (int k = 0; k < array.getSize(); k++) {
+        cout << array.get(k).get().int32Value << " ";
+      }
+      cout << "\n";
+    }
+    cout << "====\n";
+  }
+
+  cout << "===================\n";
+  cout << "Extract some values: \n";
+  for (int k = 0; k < 5; k++) {
+    for (int i = 0; i < 5; i++) {
+      cout << A(k, i) << " ";
+    }
+    cout << "\n\n";
+  }
+  cout << "===================\n";
+
+  cout << "elements:\n";
+  auto elements = A.getStorage().getValues();
+  for (int i = 0; i < elements.getSize(); i++) {
+    cout << elements.get(i).get().float64Value << " ";
+  }
+  cout << "\n";
+  cout << "data: \n";
+  auto data = (double*) elements.getData();
+  for (int i = 0; i < elements.getSize(); i++) {
+    cout << data[i] << " ";
+  }
+  cout << "\n";
+  cout << endl;
+  return 0;
+}
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
@@ -0,0 +1,5 @@
+# SPDX-FileCopyrightText: 2024 Binsparse Developers
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+target_include_directories(bsp-to-taco PUBLIC .)
diff --git a/include/bsp_to_taco.hpp b/include/bsp_to_taco.hpp
@@ -0,0 +1,7 @@
+#include <binsparse/tensor.h>
+#include <binsparse/read_tensor.h>
+#include <taco.h>
+
+taco::TensorBase makeTacoTensor(bsp_tensor_t& tensor);
+
+taco::TensorBase readBinSparse(std::string filename);
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -0,0 +1,7 @@
+# SPDX-FileCopyrightText: 2024 Binsparse Developers
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+target_sources(bsp-to-taco PRIVATE
+  src/bsp_to_taco.cpp
+)
diff --git a/src/bsp_to_taco.cpp b/src/bsp_to_taco.cpp

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +scripts
 +venv
 +build
 +._*
 +tensor_test_files