@@ -264,7 +264,7 @@ module Expand =
264
264
if length = 0 then
265
265
segmentPointers.Free processor
266
266
267
- None
267
+ length , None
268
268
else
269
269
// expand
270
270
let leftMatrixValues , rightMatrixValues , columns , rows =
@@ -281,26 +281,28 @@ module Expand =
281
281
columns.Free processor
282
282
rows.Free processor
283
283
284
- mulResult
285
- |> Option.bind
286
- ( fun ( resultValues , resultColumns , resultRows ) ->
287
- // sort
288
- let sortedValues , sortedColumns , sortedRows =
289
- sort processor resultValues resultColumns resultRows
284
+ let result =
285
+ mulResult
286
+ |> Option.bind
287
+ ( fun ( resultValues , resultColumns , resultRows ) ->
288
+ // sort
289
+ let sortedValues , sortedColumns , sortedRows =
290
+ sort processor resultValues resultColumns resultRows
290
291
291
- resultValues.Free processor
292
- resultColumns.Free processor
293
- resultRows.Free processor
292
+ resultValues.Free processor
293
+ resultColumns.Free processor
294
+ resultRows.Free processor
294
295
295
- // addition
296
- let reduceResult =
297
- reduce processor allocationMode sortedValues sortedColumns sortedRows
296
+ // addition
297
+ let reduceResult =
298
+ reduce processor allocationMode sortedValues sortedColumns sortedRows
298
299
299
- sortedValues.Free processor
300
- sortedColumns.Free processor
301
- sortedRows.Free processor
300
+ sortedValues.Free processor
301
+ sortedColumns.Free processor
302
+ sortedRows.Free processor
302
303
303
- reduceResult)
304
+ reduceResult)
305
+ length, result
304
306
305
307
let runOneStep opAdd opMul ( clContext : ClContext ) workGroupSize =
306
308
@@ -323,7 +325,7 @@ module Expand =
323
325
Columns = leftMatrix.Columns
324
326
Values = leftMatrix.Values }
325
327
326
- let result =
328
+ let _ , result =
327
329
runCOO processor allocationMode rightMatrixRowsNNZ rightMatrix leftMatrixCOO
328
330
329
331
rows.Free processor
@@ -343,7 +345,7 @@ module Expand =
343
345
let gather = Gather.run clContext workGroupSize
344
346
345
347
let upperBound =
346
- ClArray.upperBoundAndValue clContext workGroupSize
348
+ ClArray.upperBound clContext workGroupSize
347
349
348
350
let set = ClArray.set clContext workGroupSize
349
351
@@ -378,9 +380,13 @@ module Expand =
378
380
clContext.CreateClCell( workOffset + maxAllocSize: int)
379
381
380
382
// find largest row that fit into maxAllocSize
381
- let endRow , value =
383
+ let upperBound =
382
384
( upperBound currentBound) .ToHostAndFree processor
383
385
386
+ let endRow = upperBound - 2
387
+
388
+ currentBound.Free processor
389
+
384
390
// TODO(handle largest rows)
385
391
// (we can split row, multiply and merge them but merge path needed)
386
392
if endRow = beginRow then
@@ -389,12 +395,15 @@ module Expand =
389
395
// extract matrix TODO(Transfer overhead)
390
396
let subMatrix =
391
397
subMatrix beginRow ( endRow - beginRow) leftMatrix
398
+
392
399
// compute sub result
393
- let result = runCOO subMatrix
400
+ let length , result = runCOO subMatrix
401
+ // increase workOffset according to previous expand
402
+ let workOffset = workOffset + length
394
403
395
404
match result with
396
- | Some result -> helper endRow value <| result :: previousResult
397
- | None -> helper endRow value previousResult
405
+ | Some result -> helper endRow workOffset <| result :: previousResult
406
+ | None -> helper endRow workOffset previousResult
398
407
else
399
408
previousResult
400
409
0 commit comments