tensor-compiler
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎Makefile
Lines changed: 2 additions & 0 deletions b/‎Makefile
Lines changed: 2 additions & 0 deletions
diff --git a/‎download_frostt.sh
Lines changed: 4 additions & 5 deletions b/‎download_frostt.sh
Lines changed: 4 additions & 5 deletions
diff --git a/‎download_suitesparse.sh
Lines changed: 2861 additions & 0 deletions b/‎download_suitesparse.sh
Lines changed: 2861 additions & 0 deletions
diff --git a/‎numpy/conftest.py
Lines changed: 1 addition & 3 deletions b/‎numpy/conftest.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎numpy/ufuncs.py
Lines changed: 14 additions & 0 deletions b/‎numpy/ufuncs.py
Lines changed: 14 additions & 0 deletions
diff --git a/‎numpy/util.py
Lines changed: 192 additions & 0 deletions b/‎numpy/util.py
Lines changed: 192 additions & 0 deletions
@@ -5,3 +5,4 @@
 *.swo
 *.swp
 data/FROSTT/*
+venv/*
@@ -5,6 +5,8 @@ BENCHFLAGS := #"--benchmark-group-by=func"
 IGNORE += taco
 IGNORE_FLAGS := $(addprefix --ignore=, $(IGNORE)) 
 
+export TACO_TENSOR_PATH = data/
+
 # To group benchmark output by benchmark, use BENCHFLAGS=--benchmark-group-by=func.
 # To additionally group by a parameterized value, add on ",param:<paramname>" to the
 # command above.
 
@@ -21,11 +21,10 @@ mkdir -p data/FROSTT
 for i in ${!TENSOR_URLS[@]}; do
     name=${TENSOR_NAMES[$i]}
     url=${TENSOR_URLS[$i]}
-    outdir="data/FROSTT/$name"
-    if [ -d "$outdir" ]; then
+    out="data/FROSTT/$name.tns"
+    if [ -f "$out" ]; then
         continue
     fi
-    echo "Downloading tensor $name to $outdir"
-    mkdir "$outdir"
-    curl $url | gzip -d -c > "$outdir/tensor.frostt"
+    echo "Downloading tensor $name to $out"
+    curl $url | gzip -d -c > "$out"
 done
@@ -3,9 +3,7 @@
 def tacoBench(benchmark):
     def f(func):
         # Take statistics based on 10 rounds.
-        benchmark.pedantic(func, rounds=10, iterations=5)
-        # How do i set please use 10 rounds...
-        # benchmark(func)
+        benchmark.pedantic(func, rounds=10, iterations=1, warmup_rounds=1)
     return f
 
 def pytest_addoption(parser):
 
@@ -2,6 +2,7 @@
 from scipy.sparse import random, csr_matrix
 import sparse
 import pytest
+from util import TensorCollectionFROSTT, PydataTensorShifter
 
 # TODO (rohany): Ask hameer about this. pydata/sparse isn't happy when
 #  given this ufunc to evaluate.
@@ -85,3 +86,16 @@ def bench():
         return C
     tacoBench(bench)
     print("Result", bench())
+
+# Run benchmarks against the FROSTT collection.
+FROSTTTensors = TensorCollectionFROSTT()
+@pytest.mark.parametrize("tensor", FROSTTTensors.getTensors(), ids=FROSTTTensors.getTensorNames())
+def bench_pydata_frostt_ufunc_sparse(tacoBench, tensor):
+    frTensor = tensor.load()
+    shifter = PydataTensorShifter()
+    other = shifter.shiftLastMode(frTensor).astype('int64')
+    def bench():
+        # TODO (rohany): Expand this test beyond ldexp.
+        c = numpy.ldexp(frTensor, other)
+        return c
+    tacoBench(bench)
@@ -0,0 +1,192 @@
+import scipy.sparse
+import sparse
+import os
+import glob
+
+# Get the path to the directory holding random tensors. Error out
+# if this isn't set.
+TENSOR_PATH = os.environ['TACO_TENSOR_PATH']
+
+# TnsFileLoader loads a tensor stored in .tns format.
+class TnsFileLoader:
+    def __init__(self):
+        pass
+
+    def load(self, path):
+        coordinates = []
+        values = []
+        dims = []
+        first = True
+        with open(path, 'r') as f:
+            for line in f:
+                data = line.split(' ')
+                if first:
+                    first = False
+                    dims = [0] * (len(data) - 1)
+                    for i in range(len(data) - 1):
+                        coordinates.append([])
+
+                for i in range(len(data) - 1):
+                    coordinates[i].append(int(data[i]) - 1)
+                    dims[i] = max(dims[i], coordinates[i][-1] + 1)
+                # TODO (rohany): What if we want this to be an integer?
+                values.append(float(data[-1]))
+        return dims, coordinates, values
+
+# TnsFileDumper dumps a dictionary of coordinates to values
+# into a coordinate list tensor file.
+class TnsFileDumper:
+    def __init__(self):
+        pass
+
+    def dump_dict_to_file(self, shape, data, path):
+        # Sort the data so that the output is deterministic.
+        sorted_data = sorted([list(coords) + [value] for coords, value in data.items()])
+        with open(path, 'w+') as f:
+            for line in sorted_data:
+                coords = [str(elem + 1) for elem in line[:len(line) - 1]]
+                strings = coords + [str(line[-1])]
+                f.write(" ".join(strings))
+                f.write("\n")
+
+# ScipySparseTensorLoader loads a sparse tensor from a file into a
+# scipy.sparse CSR matrix.
+class ScipySparseTensorLoader:
+    def __init__(self, format):
+        self.loader = TnsFileLoader()
+        self.format = format
+
+    def load(self, path):
+        dims, coords, values = self.loader.load(path)
+        if self.format == "csr":
+            return scipy.sparse.csr_matrix((values, (coords[0], coords[1])), shape=tuple(dims))
+        elif self.format == "csc":
+            return scipy.sparse.csc_matrix((values, (coords[0], coords[1])), shape=tuple(dims))
+        else:
+            assert(False)
+
+# PydataSparseTensorLoader loads a sparse tensor from a file into
+# a pydata.sparse tensor.
+class PydataSparseTensorLoader:
+    def __init__(self):
+        self.loader = TnsFileLoader()
+    
+    def load(self, path):
+        dims, coords, values = self.loader.load(path)
+        return sparse.COO(coords, values, tuple(dims))
+
+# construct_random_tensor_key constructs a unique key that represents
+# a random tensor parameterized by the chosen shape and sparsity.
+# The key itself is formatted by the dimensions, followed by the
+# sparsity. For example, a 250 by 250 tensor with sparsity 0.01
+# would have a key of 250x250-0.01.tns.
+def construct_random_tensor_key(shape, sparsity):
+    path = TENSOR_PATH
+    dims = "x".join([str(dim) for dim in shape])
+    key = "{}-{}.tns".format(dims, sparsity)
+    return os.path.join(path, "random", key)
+
+# RandomPydataSparseTensorLoader should be used to generate
+# random pydata.sparse tensors. It caches the loaded tensors
+# in the file system so that TACO benchmarks using tensors
+# with the same parameters can use the exact same tensors.
+class RandomPydataSparseTensorLoader:
+    def __init__(self):
+        self.loader = PydataSparseTensorLoader()
+
+    def random(self, shape, sparsity):
+        key = construct_random_tensor_key(shape, sparsity)
+        # If a tensor with these properties exists already, then load it.
+        if os.path.exists(key):
+            return self.loader.load(key)
+        else:
+            # Otherwise, we must create a random tensor with the desired properties,
+            # dump it to the output file, then return it.
+            result = sparse.random(shape, density=sparsity)
+            dok = sparse.DOK(result)
+            TnsFileDumper().dump_dict_to_file(shape, dok.data, key)
+            return result
+
+# RandomScipySparseTensorLoader is the same as RandomPydataSparseTensorLoader
+# but for scipy.sparse tensors.
+class RandomScipySparseTensorLoader:
+    def __init__(self, format):
+        self.loader = ScipySparseTensorLoader(format)
+        self.format = format
+
+    def random(self, shape, sparsity):
+        assert(len(shape) == 2)
+        key = construct_random_tensor_key(shape, sparsity)
+        # If a tensor with these properties exists already, then load it.
+        if os.path.exists(key):
+            return self.loader.load(key)
+        else:
+            # Otherwise, create and then dump a tensor.
+            result = scipy.sparse.random(shape[0], shape[1], density=sparsity, format=self.format)
+            dok = scipy.sparse.dok_matrix(result)
+            TnsFileDumper().dump_dict_to_file(shape, dict(dok.items()), key)
+            return result
+
+# FROSTTTensor represents a tensor in the FROSTT dataset.
+class FROSTTTensor:
+    def __init__(self, path):
+        self.path = path
+
+    def __str__(self):
+        f = os.path.split(self.path)[1]
+        return f.replace(".tns", "")
+
+    def load(self):
+        return PydataSparseTensorLoader().load(self.path)
+
+# TensorCollectionFROSTT represents the set of all FROSTT tensors.
+class TensorCollectionFROSTT:
+    def __init__(self):
+        data = os.path.join(TENSOR_PATH, "FROSTT")
+        frostttensors = glob.glob(os.path.join(data, "*.tns"))
+        self.tensors = [FROSTTTensor(t) for t in frostttensors]
+
+    def getTensors(self):
+        return self.tensors
+    def getTensorNames(self):
+        return [str(tensor) for tensor in self.getTensors()]
+
+# PydataTensorShifter shifts all elements in the last mode
+# of the input pydata/sparse tensor by one.
+class PydataTensorShifter:
+    def __init__(self):
+        pass
+
+    def shiftLastMode(self, tensor):
+        coords = tensor.coords
+        data = tensor.data
+        resultCoords = []
+        for j in range(len(tensor.shape)):
+            resultCoords.append([0] * len(data))
+        resultValues = [0] * len(data)
+        for i in range(len(data)):
+            for j in range(len(tensor.shape)):
+                resultCoords[j][i] = coords[j][i]
+            resultValues[i] = data[i]
+            resultCoords[-1][i] = (resultCoords[-1][i] + 1) % tensor.shape[-1]
+        return sparse.COO(resultCoords, resultValues, tensor.shape)
+
+# ScipyTensorShifter shifts all elements in the last mode
+# of the input scipy/sparse tensor by one.
+class ScipyTensorShifter:
+    def __init__(self, format):
+        self.format = format
+
+    def shiftLastMode(self, tensor):
+        dok = scipy.sparse.dok_matrix(tensor)
+        result = scipy.sparse.dok_matrix(tensor.shape)
+        for coord, val in dok.items():
+            newCoord = list(coord[:])
+            newCoord[-1] = (newCoord[-1] + 1) % tensor.shape[-1]
+            result[tuple(newCoord)] = val
+        if self.format == "csr":
+            return scipy.sparse.csr_matrix(result)
+        elif self.format == "csc":
+            return scipy.sparse.csc_matrix(result)
+        else:
+            assert(False)
-Original file line number
+Diff line change
 *.swo
 *.swp
 data/FROSTT/*
 +venv/*