tensor-compiler
diff --git a/‎examples/spmv_assembly.c
Lines changed: 2 additions & 2 deletions b/‎examples/spmv_assembly.c
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/spmv_compute.c
Lines changed: 17 additions & 8 deletions b/‎examples/spmv_compute.c
Lines changed: 17 additions & 8 deletions
diff --git a/‎examples/spmv_full.c
Lines changed: 35 additions & 17 deletions b/‎examples/spmv_full.c
Lines changed: 35 additions & 17 deletions
diff --git a/‎javascripts/default-schedules.js
Lines changed: 1 addition & 1 deletion b/‎javascripts/default-schedules.js
Lines changed: 1 addition & 1 deletion
diff --git a/‎javascripts/demo.js
Lines changed: 29 additions & 9 deletions b/‎javascripts/demo.js
Lines changed: 29 additions & 9 deletions
@@ -1,5 +1,5 @@
 // Generated by the Tensor Algebra Compiler (tensor-compiler.org)
-// taco "y(i)=A(i,j)*x(j)" -f=y:d:0 -f=A:ds:0,1 -f=x:d:0 -write-source=taco_kernel.c -write-compute=taco_compute.c -write-assembly=taco_assembly.c
+// taco "y(i)=A(i,j)*x(j)" -f=y:d:0 -f=A:ds:0,1 -f=x:d:0 -s=split(i,i0,i1,32) -s=reorder(i0,i1,j) -s=parallelize(i0,CPUThread,NoRaces) -write-source=taco_kernel.c -write-compute=taco_compute.c -write-assembly=taco_assembly.c
 
 int assemble(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x) {
   int y1_dimension = (int)(y->dimensions[0]);
@@ -9,4 +9,4 @@ int assemble(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x) {
 
   y->vals = (uint8_t*)y_vals;
   return 0;
-}
+}
@@ -1,5 +1,5 @@
 // Generated by the Tensor Algebra Compiler (tensor-compiler.org)
-// taco "y(i)=A(i,j)*x(j)" -f=y:d:0 -f=A:ds:0,1 -f=x:d:0 -write-source=taco_kernel.c -write-compute=taco_compute.c -write-assembly=taco_assembly.c
+// taco "y(i)=A(i,j)*x(j)" -f=y:d:0 -f=A:ds:0,1 -f=x:d:0 -s=split(i,i0,i1,32) -s=reorder(i0,i1,j) -s=parallelize(i0,CPUThread,NoRaces) -write-source=taco_kernel.c -write-compute=taco_compute.c -write-assembly=taco_assembly.c
 
 int compute(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x) {
   int y1_dimension = (int)(y->dimensions[0]);
@@ -11,14 +11,23 @@ int compute(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x) {
   int x1_dimension = (int)(x->dimensions[0]);
   double* restrict x_vals = (double*)(x->vals);
 
+  #pragma omp parallel for schedule(static)
+  for (int32_t py = 0; py < y1_dimension; py++) {
+    y_vals[py] = 0.0;
+  }
+
   #pragma omp parallel for schedule(runtime)
-  for (int32_t i = 0; i < A1_dimension; i++) {
-    double y_val = 0.0;
-    for (int32_t jA = A2_pos[i]; jA < A2_pos[(i + 1)]; jA++) {
-      int32_t j = A2_crd[jA];
-      y_val += A_vals[jA] * x_vals[j];
+  for (int32_t i0 = 0; i0 < ((A1_dimension + 31) / 32); i0++) {
+    for (int32_t i1 = 0; i1 < 32; i1++) {
+      int32_t i = i0 * 32 + i1;
+      if (i >= A1_dimension)
+        continue;
+
+      for (int32_t jA = A2_pos[i]; jA < A2_pos[(i + 1)]; jA++) {
+        int32_t j = A2_crd[jA];
+        y_vals[i] = y_vals[i] + A_vals[jA] * x_vals[j];
+      }
     }
-    y_vals[i] = y_val;
   }
   return 0;
-}
+}
@@ -1,5 +1,5 @@
 // Generated by the Tensor Algebra Compiler (tensor-compiler.org)
-// taco "y(i)=A(i,j)*x(j)" -f=y:d:0 -f=A:ds:0,1 -f=x:d:0 -write-source=taco_kernel.c -write-compute=taco_compute.c -write-assembly=taco_assembly.c
+// taco "y(i)=A(i,j)*x(j)" -f=y:d:0 -f=A:ds:0,1 -f=x:d:0 -s=split(i,i0,i1,32) -s=reorder(i0,i1,j) -s=parallelize(i0,CPUThread,NoRaces) -write-source=taco_kernel.c -write-compute=taco_compute.c -write-assembly=taco_assembly.c
 #ifndef TACO_C_HEADERS
 #define TACO_C_HEADERS
 #include <stdio.h>
@@ -118,14 +118,23 @@ int compute(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x) {
   int x1_dimension = (int)(x->dimensions[0]);
   double* restrict x_vals = (double*)(x->vals);
 
+  #pragma omp parallel for schedule(static)
+  for (int32_t py = 0; py < y1_dimension; py++) {
+    y_vals[py] = 0.0;
+  }
+
   #pragma omp parallel for schedule(runtime)
-  for (int32_t i = 0; i < A1_dimension; i++) {
-    double y_val = 0.0;
-    for (int32_t jA = A2_pos[i]; jA < A2_pos[(i + 1)]; jA++) {
-      int32_t j = A2_crd[jA];
-      y_val += A_vals[jA] * x_vals[j];
+  for (int32_t i0 = 0; i0 < ((A1_dimension + 31) / 32); i0++) {
+    for (int32_t i1 = 0; i1 < 32; i1++) {
+      int32_t i = i0 * 32 + i1;
+      if (i >= A1_dimension)
+        continue;
+
+      for (int32_t jA = A2_pos[i]; jA < A2_pos[(i + 1)]; jA++) {
+        int32_t j = A2_crd[jA];
+        y_vals[i] = y_vals[i] + A_vals[jA] * x_vals[j];
+      }
     }
-    y_vals[i] = y_val;
   }
   return 0;
 }
@@ -153,14 +162,23 @@ int evaluate(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x) {
   int32_t y_capacity = y1_dimension;
   y_vals = (double*)malloc(sizeof(double) * y_capacity);
 
+  #pragma omp parallel for schedule(static)
+  for (int32_t py = 0; py < y_capacity; py++) {
+    y_vals[py] = 0.0;
+  }
+
   #pragma omp parallel for schedule(runtime)
-  for (int32_t i = 0; i < A1_dimension; i++) {
-    double y_val = 0.0;
-    for (int32_t jA = A2_pos[i]; jA < A2_pos[(i + 1)]; jA++) {
-      int32_t j = A2_crd[jA];
-      y_val += A_vals[jA] * x_vals[j];
+  for (int32_t i0 = 0; i0 < ((A1_dimension + 31) / 32); i0++) {
+    for (int32_t i1 = 0; i1 < 32; i1++) {
+      int32_t i = i0 * 32 + i1;
+      if (i >= A1_dimension)
+        continue;
+
+      for (int32_t jA = A2_pos[i]; jA < A2_pos[(i + 1)]; jA++) {
+        int32_t j = A2_crd[jA];
+        y_vals[i] = y_vals[i] + A_vals[jA] * x_vals[j];
+      }
     }
-    y_vals[i] = y_val;
   }
 
   y->vals = (uint8_t*)y_vals;
@@ -218,12 +236,12 @@ int pack_A(taco_tensor_t *A, int* A_COO1_pos, int* A_COO1_crd, int* A_COO2_crd,
         jA_COO++;
       }
       if (A_capacity <= jA) {
-        A_vals = (double*)realloc(A_vals, sizeof(double) * (A_capacity * 2));
+        A_vals = (double*)realloc(A_vals, sizeof(double) * A_capacity * 2);
         A_capacity *= 2;
       }
       A_vals[jA] = A_COO_val;
       if (A2_crd_size <= jA) {
-        A2_crd = (int32_t*)realloc(A2_crd, sizeof(int32_t) * (A2_crd_size * 2));
+        A2_crd = (int32_t*)realloc(A2_crd, sizeof(int32_t) * A2_crd_size * 2);
         A2_crd_size *= 2;
       }
       A2_crd[jA] = j;
@@ -294,12 +312,12 @@ int unpack(int** y_COO1_pos_ptr, int** y_COO1_crd_ptr, double** y_COO_vals_ptr,
 
   for (int32_t i = 0; i < y1_dimension; i++) {
     if (y_COO_capacity <= iy_COO) {
-      y_COO_vals = (double*)realloc(y_COO_vals, sizeof(double) * (y_COO_capacity * 2));
+      y_COO_vals = (double*)realloc(y_COO_vals, sizeof(double) * y_COO_capacity * 2);
       y_COO_capacity *= 2;
     }
     y_COO_vals[iy_COO] = y_vals[i];
     if (y_COO1_crd_size <= iy_COO) {
-      y_COO1_crd = (int32_t*)realloc(y_COO1_crd, sizeof(int32_t) * (y_COO1_crd_size * 2));
+      y_COO1_crd = (int32_t*)realloc(y_COO1_crd, sizeof(int32_t) * y_COO1_crd_size * 2);
       y_COO1_crd_size *= 2;
     }
     y_COO1_crd[iy_COO] = i;
 
@@ -74,7 +74,7 @@ var default_GPU_schedules = {
         },
         {
           command: "split", 
-          parameters: ["fpos1", "warp", "fpos2", 216]
+          parameters: ["fpos1", "warp", "fpos2", 256]
         },
         {
           command: "split", 
 
@@ -5,7 +5,6 @@ function demo() {
       tensorOrders: {},
       error: "",
       indices: [], 
-      accesses: []
     },
     schedule: [],
     output: {
@@ -31,7 +30,9 @@ function demo() {
       }
     },
     updateScheduleView: function() {
+      console.log(model.schedule);
       model.removeInvalidIndices();
+      model.removeInvalidAccesses();
       model.scheduleView(0);
     },
     addOutputView: function(newView) {
@@ -65,7 +66,6 @@ function demo() {
         try {
           model.input.tensorOrders = parser.parse(expression);
           model.input.indices = [...new Set(parser_indices.parse(expression))];
-          model.input.accesses = [...new Set(parser_accesses.parse(expression))];
           model.input.error = "";
           for (t in model.input.tensorOrders) {
             if (model.input.tensorOrders[t] < 0) {
@@ -126,7 +126,7 @@ function demo() {
     },
     resetSchedule: function() {
       model.schedule = [];
-      model.updateScheduleView();   
+      model.updateScheduleView();
     },
     addScheduleRow: function() {
       model.schedule.push({command: "", parameters: []});
@@ -209,7 +209,24 @@ function demo() {
         }
       }
     },
+    removeInvalidAccesses: function() {
+      for (var row = 0; row < model.schedule.length; ++row) {
+        for (var index = 0; index < model.schedule[row]["parameters"].length; ++index) {
+          var command = model.schedule[row]["command"];
+          var value = model.schedule[row]["parameters"][index];
+          if (model.isParameterType(command, index, "access dropdown") 
+              && !model.input.tensorOrders.hasOwnProperty(value)) {
+            model.schedule[row]["parameters"][index] = "";
+            model.updateInferred(row, command, index, "");
+          }
+        }
+      }
+    },
     isParameterType: function(command, index, parameterType) {
+      if (command === "reorder") {
+        return parameterType === "index dropdown";
+      }
+
       return scheduleCommands[command][index] && scheduleCommands[command][index][0] === parameterType;
     },
     updateInferred: function(row, command, index, value) {
@@ -352,7 +369,7 @@ function demo() {
         var hideTables = function() { 
           $("#tblFormats").hide(); 
           $("#tblSchedule").hide(); 
-          tblScheduleView.clear();
+          model.resetSchedule()
         };
         tblFormatsView.timerEvent = setTimeout(hideTables, timeout);
       } else {
@@ -698,10 +715,13 @@ function demo() {
       // a dropdown where user can choose from argument tensors
       function accessDropdown(parameterName, inputId, input) {
         var parameter = dropdown(parameterName, inputId, input);
-        for (var access of model.input.accesses) {
-          parameter += "<li><a>"; 
-          parameter += access; 
-          parameter += "</a></li>";
+        for (var access in model.input.tensorOrders) {
+          if (model.input.tensorOrders[access] > 0 
+              && model.input.expression.indexOf(access) > model.input.expression.indexOf("=")) {
+            parameter += "<li><a>"; 
+            parameter += access; 
+            parameter += "</a></li>";
+          }
         }
         parameter += "</ul></div></li>";
         return parameter; 
@@ -907,7 +927,7 @@ function demo() {
 
   $("#txtExpr").keyup(function() {
     model.setInput($("#txtExpr").val());
-    model.resetSchedule();
+    model.updateScheduleView();
   });
 
   var panelKernelsView = {
Original file line number	Diff line number	Diff line change
`@@ -74,7 +74,7 @@ var default_GPU_schedules = {`
`74`	`74`	`},`
`75`	`75`	`{`
`76`	`76`	`command: "split",`
`77`		`- parameters: ["fpos1", "warp", "fpos2", 216]`
	`77`	`+ parameters: ["fpos1", "warp", "fpos2", 256]`
`78`	`78`	`},`
`79`	`79`	`{`
`80`	`80`	`command: "split",`