Skip to content

Commit 84a22a5

Browse files
committed
Static 0.4
1 parent c64bdb2 commit 84a22a5

File tree

2 files changed

+13
-13
lines changed

2 files changed

+13
-13
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "TriangularSolve"
22
uuid = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf"
33
authors = ["chriselrod <[email protected]> and contributors"]
4-
version = "0.1.6"
4+
version = "0.1.7"
55

66
[deps]
77
CloseOpenIntervals = "fb6a15b2-703c-40df-9091-08a04967cfa9"
@@ -19,7 +19,7 @@ IfElse = "0.1"
1919
LayoutPointers = "0.1.2"
2020
LoopVectorization = "0.12.30"
2121
Polyester = "0.4, 0.5"
22-
Static = "0.2, 0.3"
22+
Static = "0.2, 0.3, 0.4"
2323
VectorizationBase = "0.21"
2424
julia = "1.5"
2525

src/TriangularSolve.jl

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ end
139139
end
140140
@inline store_small_kern!(spa, ::Nothing, v, spu, i, n, ::Val{false}) = vstore!(spa, v / vload(spu, (n,n)), i)
141141

142-
function BdivU_small_kern!(spa::AbstractStridedPointer{T}, sp, spb::AbstractStridedPointer{T}, spu::AbstractStridedPointer{T}, N, mask::AbstractMask{W}, ::Val{UNIT}) where {T,UNIT,W}
142+
@inline function BdivU_small_kern!(spa::AbstractStridedPointer{T}, sp, spb::AbstractStridedPointer{T}, spu::AbstractStridedPointer{T}, N, mask::AbstractMask{W}, ::Val{UNIT}) where {T,UNIT,W}
143143
# W = VectorizationBase.pick_vector_width(T)
144144
for n CloseOpen(N)
145145
Amn = vload(spb, (MM{W}(StaticInt(0)),n), mask)
@@ -149,7 +149,7 @@ function BdivU_small_kern!(spa::AbstractStridedPointer{T}, sp, spb::AbstractStri
149149
store_small_kern!(spa, sp, Amn, spu, (MM{W}(StaticInt(0)),n), n, mask, Val{UNIT}())
150150
end
151151
end
152-
function BdivU_small_kern_u!(spa::AbstractStridedPointer{T}, sp, spb::AbstractStridedPointer{T}, spu::AbstractStridedPointer{T}, N, ::StaticInt{U}, ::Val{UNIT}) where {T,U,UNIT}
152+
@inline function BdivU_small_kern_u!(spa::AbstractStridedPointer{T}, sp, spb::AbstractStridedPointer{T}, spu::AbstractStridedPointer{T}, N, ::StaticInt{U}, ::Val{UNIT}) where {T,U,UNIT}
153153
W = Int(VectorizationBase.pick_vector_width(T))
154154
for n CloseOpen(N)
155155
Amn = vload(spb, Unroll{1,W,U,1,W,0x0000000000000000,1}((StaticInt(0),n)))
@@ -203,7 +203,7 @@ end
203203

204204
@generated function rdiv_solve_W_u!(spc, spb, spa, spu, n, ::StaticInt{W}, ::StaticInt{U}, ::Val{UNIT}) where {W, U, UNIT}
205205
quote
206-
# $(Expr(:meta,:inline))
206+
$(Expr(:meta,:inline))
207207
# here, we just want to load the vectors
208208
C11 = VectorizationBase.data(vload(spa, Unroll{2,1,$W,1,$W,0x0000000000000000,1}(Unroll{1,$W,$U,1,$W,0x0000000000000000,1}((StaticInt(0),n)))))
209209
Base.Cartesian.@nexprs $W c -> C11_c = C11[c]
@@ -224,7 +224,7 @@ end
224224
:(vstore!(spc, C11, i, mask))
225225
end
226226
quote
227-
# $(Expr(:meta,:inline))
227+
$(Expr(:meta,:inline))
228228
# here, we just want to load the vectors
229229
C11 = VectorizationBase.data(vload(spa, Unroll{2,1,$W,1,$W,0xffffffffffffffff,1}((StaticInt(0),n)), mask))
230230
Base.Cartesian.@nexprs $W c -> C11_c = C11[c]
@@ -240,7 +240,7 @@ end
240240
end
241241
end
242242

243-
function rdiv_U!(spc::AbstractStridedPointer{T}, spa::AbstractStridedPointer, spu::AbstractStridedPointer, M, N, ::StaticInt{1}, ::Val{UNIT}) where {T,UNIT}
243+
@inline function rdiv_U!(spc::AbstractStridedPointer{T}, spa::AbstractStridedPointer, spu::AbstractStridedPointer, M, N, ::StaticInt{1}, ::Val{UNIT}) where {T,UNIT}
244244
WS = pick_vector_width(T)
245245
W = Int(WS)
246246
UF = unroll_factor(WS)
@@ -381,7 +381,6 @@ function rdiv_block_N!(
381381
# println("Solve with N_temp = $N_temp and n = $n")
382382
rdiv_U!(spc, spa_rdiv, gesp(spu, (n,StaticInt{0}())), M, N_temp, StaticInt{X}(), Val(UNIT))
383383
repeat || break
384-
385384
spa = gesp(spa, (StaticInt(0), B_normalized))
386385
spc = gesp(spc, (StaticInt(0), B_normalized))
387386
spu = gesp(spu, (StaticInt(0), B_normalized))
@@ -439,20 +438,21 @@ end
439438
function multithread_rdiv!(
440439
spc::AbstractStridedPointer{T}, spa, spu, M, N, mtb, ::Val{UNIT}, ::StaticInt{X}
441440
) where {X,T,UNIT}
441+
mtb = 8
442442
(Md, Mr) = VectorizationBase.vdivrem(M, mtb)
443443
Nblock = Md + (Mr 0)
444444
Mrem = Core.ifelse(Mr 0, Mr, mtb)
445445
# @show mtb, Nblock, Mrem, Md, Mr
446446
# return
447-
let (Md, Mr) = VectorizationBase.vdivrem(M, mtb), Nblock = Md + (Mr 0), Mrem = Core.ifelse(Mr 0, Mr, mtb)
447+
let Md = Md, Mr = Mr, Nblock = Md + (Mr 0), Mrem = Core.ifelse(Mr 0, Mr, mtb), VUNIT = Val{UNIT}(), StaticX = StaticInt{X}()
448448
@batch for block in CloseOpen(Nblock)
449-
# let block = 0
450-
Mtemp = Core.ifelse(block == Nblock-1, Mrem, mtb)
449+
# for block in CloseOpen(Nblock)
450+
# let block = 0
451451
rdiv_block_MandN!(
452-
# rdiv_block_N!(
452+
# rdiv_block_N!(
453453
gesp(spc, (mtb*block, StaticInt{0}())),
454454
gesp(spa, (mtb*block, StaticInt{0}())),
455-
spu, Mtemp, N, Val{UNIT}(), StaticInt{X}()
455+
spu, Core.ifelse(block == Nblock-1, Mrem, mtb), N, VUNIT, StaticX
456456
# spu, M, N, Val{UNIT}(), StaticInt{X}()
457457
)
458458
end

0 commit comments

Comments
 (0)