Skip to content

Commit a08db72

Browse files
committed
2 parents afc94e1 + a8e8f6f commit a08db72

File tree

13 files changed

+3340
-84
lines changed

13 files changed

+3340
-84
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@
55
*.swo
66
*.swp
77
data/FROSTT/*
8+
venv/*

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ BENCHFLAGS := #"--benchmark-group-by=func"
55
IGNORE += taco
66
IGNORE_FLAGS := $(addprefix --ignore=, $(IGNORE))
77

8+
export TACO_TENSOR_PATH = data/
9+
810
# To group benchmark output by benchmark, use BENCHFLAGS=--benchmark-group-by=func.
911
# To additionally group by a parameterized value, add on ",param:<paramname>" to the
1012
# command above.

download_frostt.sh

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,10 @@ mkdir -p data/FROSTT
2121
for i in ${!TENSOR_URLS[@]}; do
2222
name=${TENSOR_NAMES[$i]}
2323
url=${TENSOR_URLS[$i]}
24-
outdir="data/FROSTT/$name"
25-
if [ -d "$outdir" ]; then
24+
out="data/FROSTT/$name.tns"
25+
if [ -f "$out" ]; then
2626
continue
2727
fi
28-
echo "Downloading tensor $name to $outdir"
29-
mkdir "$outdir"
30-
curl $url | gzip -d -c > "$outdir/tensor.frostt"
28+
echo "Downloading tensor $name to $out"
29+
curl $url | gzip -d -c > "$out"
3130
done

download_suitesparse.sh

Lines changed: 2861 additions & 0 deletions
Large diffs are not rendered by default.

numpy/conftest.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
def tacoBench(benchmark):
44
def f(func):
55
# Take statistics based on 10 rounds.
6-
benchmark.pedantic(func, rounds=10, iterations=5)
7-
# How do i set please use 10 rounds...
8-
# benchmark(func)
6+
benchmark.pedantic(func, rounds=10, iterations=1, warmup_rounds=1)
97
return f
108

119
def pytest_addoption(parser):

numpy/ufuncs.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from scipy.sparse import random, csr_matrix
33
import sparse
44
import pytest
5+
from util import TensorCollectionFROSTT, PydataTensorShifter
56

67
# TODO (rohany): Ask hameer about this. pydata/sparse isn't happy when
78
# given this ufunc to evaluate.
@@ -85,3 +86,16 @@ def bench():
8586
return C
8687
tacoBench(bench)
8788
print("Result", bench())
89+
90+
# Run benchmarks against the FROSTT collection.
91+
FROSTTTensors = TensorCollectionFROSTT()
92+
@pytest.mark.parametrize("tensor", FROSTTTensors.getTensors(), ids=FROSTTTensors.getTensorNames())
93+
def bench_pydata_frostt_ufunc_sparse(tacoBench, tensor):
94+
frTensor = tensor.load()
95+
shifter = PydataTensorShifter()
96+
other = shifter.shiftLastMode(frTensor).astype('int64')
97+
def bench():
98+
# TODO (rohany): Expand this test beyond ldexp.
99+
c = numpy.ldexp(frTensor, other)
100+
return c
101+
tacoBench(bench)

numpy/util.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
import scipy.sparse
2+
import sparse
3+
import os
4+
import glob
5+
6+
# Get the path to the directory holding random tensors. Error out
7+
# if this isn't set.
8+
TENSOR_PATH = os.environ['TACO_TENSOR_PATH']
9+
10+
# TnsFileLoader loads a tensor stored in .tns format.
11+
class TnsFileLoader:
12+
def __init__(self):
13+
pass
14+
15+
def load(self, path):
16+
coordinates = []
17+
values = []
18+
dims = []
19+
first = True
20+
with open(path, 'r') as f:
21+
for line in f:
22+
data = line.split(' ')
23+
if first:
24+
first = False
25+
dims = [0] * (len(data) - 1)
26+
for i in range(len(data) - 1):
27+
coordinates.append([])
28+
29+
for i in range(len(data) - 1):
30+
coordinates[i].append(int(data[i]) - 1)
31+
dims[i] = max(dims[i], coordinates[i][-1] + 1)
32+
# TODO (rohany): What if we want this to be an integer?
33+
values.append(float(data[-1]))
34+
return dims, coordinates, values
35+
36+
# TnsFileDumper dumps a dictionary of coordinates to values
37+
# into a coordinate list tensor file.
38+
class TnsFileDumper:
39+
def __init__(self):
40+
pass
41+
42+
def dump_dict_to_file(self, shape, data, path):
43+
# Sort the data so that the output is deterministic.
44+
sorted_data = sorted([list(coords) + [value] for coords, value in data.items()])
45+
with open(path, 'w+') as f:
46+
for line in sorted_data:
47+
coords = [str(elem + 1) for elem in line[:len(line) - 1]]
48+
strings = coords + [str(line[-1])]
49+
f.write(" ".join(strings))
50+
f.write("\n")
51+
52+
# ScipySparseTensorLoader loads a sparse tensor from a file into a
53+
# scipy.sparse CSR matrix.
54+
class ScipySparseTensorLoader:
55+
def __init__(self, format):
56+
self.loader = TnsFileLoader()
57+
self.format = format
58+
59+
def load(self, path):
60+
dims, coords, values = self.loader.load(path)
61+
if self.format == "csr":
62+
return scipy.sparse.csr_matrix((values, (coords[0], coords[1])), shape=tuple(dims))
63+
elif self.format == "csc":
64+
return scipy.sparse.csc_matrix((values, (coords[0], coords[1])), shape=tuple(dims))
65+
else:
66+
assert(False)
67+
68+
# PydataSparseTensorLoader loads a sparse tensor from a file into
69+
# a pydata.sparse tensor.
70+
class PydataSparseTensorLoader:
71+
def __init__(self):
72+
self.loader = TnsFileLoader()
73+
74+
def load(self, path):
75+
dims, coords, values = self.loader.load(path)
76+
return sparse.COO(coords, values, tuple(dims))
77+
78+
# construct_random_tensor_key constructs a unique key that represents
79+
# a random tensor parameterized by the chosen shape and sparsity.
80+
# The key itself is formatted by the dimensions, followed by the
81+
# sparsity. For example, a 250 by 250 tensor with sparsity 0.01
82+
# would have a key of 250x250-0.01.tns.
83+
def construct_random_tensor_key(shape, sparsity):
84+
path = TENSOR_PATH
85+
dims = "x".join([str(dim) for dim in shape])
86+
key = "{}-{}.tns".format(dims, sparsity)
87+
return os.path.join(path, "random", key)
88+
89+
# RandomPydataSparseTensorLoader should be used to generate
90+
# random pydata.sparse tensors. It caches the loaded tensors
91+
# in the file system so that TACO benchmarks using tensors
92+
# with the same parameters can use the exact same tensors.
93+
class RandomPydataSparseTensorLoader:
94+
def __init__(self):
95+
self.loader = PydataSparseTensorLoader()
96+
97+
def random(self, shape, sparsity):
98+
key = construct_random_tensor_key(shape, sparsity)
99+
# If a tensor with these properties exists already, then load it.
100+
if os.path.exists(key):
101+
return self.loader.load(key)
102+
else:
103+
# Otherwise, we must create a random tensor with the desired properties,
104+
# dump it to the output file, then return it.
105+
result = sparse.random(shape, density=sparsity)
106+
dok = sparse.DOK(result)
107+
TnsFileDumper().dump_dict_to_file(shape, dok.data, key)
108+
return result
109+
110+
# RandomScipySparseTensorLoader is the same as RandomPydataSparseTensorLoader
111+
# but for scipy.sparse tensors.
112+
class RandomScipySparseTensorLoader:
113+
def __init__(self, format):
114+
self.loader = ScipySparseTensorLoader(format)
115+
self.format = format
116+
117+
def random(self, shape, sparsity):
118+
assert(len(shape) == 2)
119+
key = construct_random_tensor_key(shape, sparsity)
120+
# If a tensor with these properties exists already, then load it.
121+
if os.path.exists(key):
122+
return self.loader.load(key)
123+
else:
124+
# Otherwise, create and then dump a tensor.
125+
result = scipy.sparse.random(shape[0], shape[1], density=sparsity, format=self.format)
126+
dok = scipy.sparse.dok_matrix(result)
127+
TnsFileDumper().dump_dict_to_file(shape, dict(dok.items()), key)
128+
return result
129+
130+
# FROSTTTensor represents a tensor in the FROSTT dataset.
131+
class FROSTTTensor:
132+
def __init__(self, path):
133+
self.path = path
134+
135+
def __str__(self):
136+
f = os.path.split(self.path)[1]
137+
return f.replace(".tns", "")
138+
139+
def load(self):
140+
return PydataSparseTensorLoader().load(self.path)
141+
142+
# TensorCollectionFROSTT represents the set of all FROSTT tensors.
143+
class TensorCollectionFROSTT:
144+
def __init__(self):
145+
data = os.path.join(TENSOR_PATH, "FROSTT")
146+
frostttensors = glob.glob(os.path.join(data, "*.tns"))
147+
self.tensors = [FROSTTTensor(t) for t in frostttensors]
148+
149+
def getTensors(self):
150+
return self.tensors
151+
def getTensorNames(self):
152+
return [str(tensor) for tensor in self.getTensors()]
153+
154+
# PydataTensorShifter shifts all elements in the last mode
155+
# of the input pydata/sparse tensor by one.
156+
class PydataTensorShifter:
157+
def __init__(self):
158+
pass
159+
160+
def shiftLastMode(self, tensor):
161+
coords = tensor.coords
162+
data = tensor.data
163+
resultCoords = []
164+
for j in range(len(tensor.shape)):
165+
resultCoords.append([0] * len(data))
166+
resultValues = [0] * len(data)
167+
for i in range(len(data)):
168+
for j in range(len(tensor.shape)):
169+
resultCoords[j][i] = coords[j][i]
170+
resultValues[i] = data[i]
171+
resultCoords[-1][i] = (resultCoords[-1][i] + 1) % tensor.shape[-1]
172+
return sparse.COO(resultCoords, resultValues, tensor.shape)
173+
174+
# ScipyTensorShifter shifts all elements in the last mode
175+
# of the input scipy/sparse tensor by one.
176+
class ScipyTensorShifter:
177+
def __init__(self, format):
178+
self.format = format
179+
180+
def shiftLastMode(self, tensor):
181+
dok = scipy.sparse.dok_matrix(tensor)
182+
result = scipy.sparse.dok_matrix(tensor.shape)
183+
for coord, val in dok.items():
184+
newCoord = list(coord[:])
185+
newCoord[-1] = (newCoord[-1] + 1) % tensor.shape[-1]
186+
result[tuple(newCoord)] = val
187+
if self.format == "csr":
188+
return scipy.sparse.csr_matrix(result)
189+
elif self.format == "csc":
190+
return scipy.sparse.csc_matrix(result)
191+
else:
192+
assert(False)

0 commit comments

Comments
 (0)