Skip to content

Commit fc6353b

Browse files
Merge pull request #6 from SciML/sorted
Sorted
2 parents 9d539e6 + 591b366 commit fc6353b

File tree

3 files changed

+147
-94
lines changed

3 files changed

+147
-94
lines changed

README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,25 @@ searchsortedfirstcorrelated(v::AbstractVector{T}, x, guess::T)
4646

4747
An accelerated `findfirst` on sorted vectors using a bracketed search. Requires a `guess`
4848
to start the search from.
49+
50+
51+
Some benchmarks:
52+
```julia
53+
julia> x = rand(Int, 2048); s = sort(x);
54+
55+
julia> @btime findfirst(==($x[1011]), $x)
56+
266.427 ns (0 allocations: 0 bytes)
57+
1011
58+
59+
julia> @btime FindFirstFunctions.findfirstequal($x[1011], $x)
60+
67.502 ns (0 allocations: 0 bytes)
61+
1011
62+
63+
julia> @btime searchsortedfirst($s, $s[1011])
64+
8.897 ns (0 allocations: 0 bytes)
65+
1011
66+
67+
julia> @btime FindFirstFunctions.findfirstsortedequal($s[1011], $s)
68+
10.896 ns (0 allocations: 0 bytes)
69+
1011
70+
```

src/FindFirstFunctions.jl

Lines changed: 119 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,69 @@
11
module FindFirstFunctions
22

3+
function _findfirstequal(vpivot::Int64, ptr::Ptr{Int64}, len::Int64)
4+
Base.llvmcall(("""
5+
declare i8 @llvm.cttz.i8(i8, i1);
6+
define i64 @entry(i64 %0, i64 %1, i64 %2) #0 {
7+
top:
8+
%ivars = inttoptr i64 %1 to i64*
9+
%btmp = insertelement <8 x i64> undef, i64 %0, i64 0
10+
%var = shufflevector <8 x i64> %btmp, <8 x i64> undef, <8 x i32> zeroinitializer
11+
%lenm7 = add nsw i64 %2, -7
12+
%dosimditer = icmp ugt i64 %2, 7
13+
br i1 %dosimditer, label %L9.lr.ph, label %L32
14+
15+
L9.lr.ph:
16+
%len8 = and i64 %2, 9223372036854775800
17+
br label %L9
18+
19+
L9:
20+
%i = phi i64 [ 0, %L9.lr.ph ], [ %vinc, %L30 ]
21+
%ivarsi = getelementptr inbounds i64, i64* %ivars, i64 %i
22+
%vpvi = bitcast i64* %ivarsi to <8 x i64>*
23+
%v = load <8 x i64>, <8 x i64>* %vpvi, align 8
24+
%m = icmp eq <8 x i64> %v, %var
25+
%mu = bitcast <8 x i1> %m to i8
26+
%matchnotfound = icmp eq i8 %mu, 0
27+
br i1 %matchnotfound, label %L30, label %L17
28+
29+
L17:
30+
%tz8 = call i8 @llvm.cttz.i8(i8 %mu, i1 true)
31+
%tz64 = zext i8 %tz8 to i64
32+
%vis = add nuw i64 %i, %tz64
33+
br label %common.ret
34+
35+
common.ret:
36+
%retval = phi i64 [ %vis, %L17 ], [ -1, %L32 ], [ %si, %L51 ], [ -1, %L67 ]
37+
ret i64 %retval
38+
39+
L30:
40+
%vinc = add nuw nsw i64 %i, 8
41+
%continue = icmp slt i64 %vinc, %lenm7
42+
br i1 %continue, label %L9, label %L32
43+
44+
L32:
45+
%cumi = phi i64 [ 0, %top ], [ %len8, %L30 ]
46+
%done = icmp eq i64 %cumi, %2
47+
br i1 %done, label %common.ret, label %L51
48+
49+
L51:
50+
%si = phi i64 [ %inc, %L67 ], [ %cumi, %L32 ]
51+
%spi = getelementptr inbounds i64, i64* %ivars, i64 %si
52+
%svi = load i64, i64* %spi, align 8
53+
%match = icmp eq i64 %svi, %0
54+
br i1 %match, label %common.ret, label %L67
55+
56+
L67:
57+
%inc = add i64 %si, 1
58+
%dobreak = icmp eq i64 %inc, %2
59+
br i1 %dobreak, label %common.ret, label %L51
60+
61+
}
62+
attributes #0 = { alwaysinline }
63+
""", "entry"), Int64, Tuple{Int64,Ptr{Int64},Int64}, vpivot, ptr,
64+
len)
65+
end
66+
367
"""
468
findfirstequal(x::Int64,A::DenseVector{Int64})
569
@@ -8,71 +72,33 @@ Finds the first value in `A` equal to `x`
872
findfirstequal(vpivot, ivars) = findfirst(isequal(vpivot), ivars)
973
function findfirstequal(vpivot::Int64, ivars::DenseVector{Int64})
1074
GC.@preserve ivars begin
11-
ret = Base.llvmcall(("""
12-
declare i8 @llvm.cttz.i8(i8, i1);
13-
define i64 @entry(i64 %0, i64 %1, i64 %2) #0 {
14-
top:
15-
%ivars = inttoptr i64 %1 to i64*
16-
%btmp = insertelement <8 x i64> undef, i64 %0, i64 0
17-
%var = shufflevector <8 x i64> %btmp, <8 x i64> undef, <8 x i32> zeroinitializer
18-
%lenm7 = add nsw i64 %2, -7
19-
%dosimditer = icmp ugt i64 %2, 7
20-
br i1 %dosimditer, label %L9.lr.ph, label %L32
21-
22-
L9.lr.ph:
23-
%len8 = and i64 %2, 9223372036854775800
24-
br label %L9
25-
26-
L9:
27-
%i = phi i64 [ 0, %L9.lr.ph ], [ %vinc, %L30 ]
28-
%ivarsi = getelementptr inbounds i64, i64* %ivars, i64 %i
29-
%vpvi = bitcast i64* %ivarsi to <8 x i64>*
30-
%v = load <8 x i64>, <8 x i64>* %vpvi, align 8
31-
%m = icmp eq <8 x i64> %v, %var
32-
%mu = bitcast <8 x i1> %m to i8
33-
%matchnotfound = icmp eq i8 %mu, 0
34-
br i1 %matchnotfound, label %L30, label %L17
35-
36-
L17:
37-
%tz8 = call i8 @llvm.cttz.i8(i8 %mu, i1 true)
38-
%tz64 = zext i8 %tz8 to i64
39-
%vis = add nuw i64 %i, %tz64
40-
br label %common.ret
41-
42-
common.ret:
43-
%retval = phi i64 [ %vis, %L17 ], [ -1, %L32 ], [ %si, %L51 ], [ -1, %L67 ]
44-
ret i64 %retval
45-
46-
L30:
47-
%vinc = add nuw nsw i64 %i, 8
48-
%continue = icmp slt i64 %vinc, %lenm7
49-
br i1 %continue, label %L9, label %L32
50-
51-
L32:
52-
%cumi = phi i64 [ 0, %top ], [ %len8, %L30 ]
53-
%done = icmp eq i64 %cumi, %2
54-
br i1 %done, label %common.ret, label %L51
55-
56-
L51:
57-
%si = phi i64 [ %inc, %L67 ], [ %cumi, %L32 ]
58-
%spi = getelementptr inbounds i64, i64* %ivars, i64 %si
59-
%svi = load i64, i64* %spi, align 8
60-
%match = icmp eq i64 %svi, %0
61-
br i1 %match, label %common.ret, label %L67
62-
63-
L67:
64-
%inc = add i64 %si, 1
65-
%dobreak = icmp eq i64 %inc, %2
66-
br i1 %dobreak, label %common.ret, label %L51
67-
68-
}
69-
attributes #0 = { alwaysinline }
70-
""", "entry"), Int64, Tuple{Int64,Ptr{Int64},Int64}, vpivot, pointer(ivars),
71-
length(ivars))
75+
ret = _findfirstequal(vpivot, pointer(ivars), length(ivars))
7276
end
7377
ret < 0 ? nothing : ret + 1
7478
end
7579

80+
"""
81+
findfirstsortedequal(vars::DenseVector{Int64}, var::Int64)::Union{Int64,Nothing}
82+
83+
Note that this differs from `searchsortedfirst` by returning `nothing` when absent.
84+
"""
85+
function findfirstsortedequal(var::Int64, vars::DenseVector{Int64},
86+
::Val{basecase}=Val(16)) where {basecase}
87+
len = length(vars)
88+
offset = 0
89+
@inbounds while len > basecase
90+
half = len >>> 1 # half on left, len - half on right
91+
offset = ifelse(vars[offset+half+1] <= var, half + offset, offset)
92+
len = len - half
93+
end
94+
# maybe occurs in vars[offset+1:offset+len]
95+
GC.@preserve vars begin
96+
ret = _findfirstequal(var, pointer(vars) + 8offset, len)
97+
end
98+
ret < 0 ? nothing : ret + offset + 1
99+
end
100+
101+
76102
"""
77103
bracketstrictlymontonic(v, x, guess; lt=<comparison>, by=<transform>, rev=false)
78104
@@ -94,36 +120,36 @@ this function would be the index returned by the previous call to `searchsorted`
94120
See `Base.sort!` for an explanation of the keyword arguments `by`, `lt` and `rev`.
95121
"""
96122
function bracketstrictlymontonic(v::AbstractVector,
97-
x,
98-
guess::T,
99-
o::Base.Order.Ordering)::NTuple{2, keytype(v)} where {T <: Integer}
100-
bottom = firstindex(v)
101-
top = lastindex(v)
102-
if guess < bottom || guess > top
103-
return bottom, top
104-
# # NOTE: for cache efficiency in repeated calls, we avoid accessing the first and last elements of `v`
105-
# # on each call to this function. This should only result in significant slow downs for calls with
106-
# # out-of-bounds values of `x` *and* bad `guess`es.
107-
# elseif lt(o, x, v[bottom])
108-
# return bottom, bottom
109-
# elseif lt(o, v[top], x)
110-
# return top, top
123+
x,
124+
guess::T,
125+
o::Base.Order.Ordering)::NTuple{2,keytype(v)} where {T<:Integer}
126+
bottom = firstindex(v)
127+
top = lastindex(v)
128+
if guess < bottom || guess > top
129+
return bottom, top
130+
# # NOTE: for cache efficiency in repeated calls, we avoid accessing the first and last elements of `v`
131+
# # on each call to this function. This should only result in significant slow downs for calls with
132+
# # out-of-bounds values of `x` *and* bad `guess`es.
133+
# elseif lt(o, x, v[bottom])
134+
# return bottom, bottom
135+
# elseif lt(o, v[top], x)
136+
# return top, top
137+
else
138+
u = T(1)
139+
lo, hi = guess, min(guess + u, top)
140+
@inbounds if Base.Order.lt(o, x, v[lo])
141+
while lo > bottom && Base.Order.lt(o, x, v[lo])
142+
lo, hi = max(bottom, lo - u), lo
143+
u += u
144+
end
111145
else
112-
u = T(1)
113-
lo, hi = guess, min(guess + u, top)
114-
@inbounds if Base.Order.lt(o, x, v[lo])
115-
while lo > bottom && Base.Order.lt(o, x, v[lo])
116-
lo, hi = max(bottom, lo - u), lo
117-
u += u
118-
end
119-
else
120-
while hi < top && !Base.Order.lt(o, x, v[hi])
121-
lo, hi = hi, min(top, hi + u)
122-
u += u
123-
end
124-
end
146+
while hi < top && !Base.Order.lt(o, x, v[hi])
147+
lo, hi = hi, min(top, hi + u)
148+
u += u
149+
end
125150
end
126-
return lo, hi
151+
end
152+
return lo, hi
127153
end
128154

129155
"""
@@ -133,13 +159,13 @@ An accelerated `findfirst` on sorted vectors using a bracketed search. Requires
133159
to start the search from.
134160
"""
135161
function searchsortedfirstcorrelated(v::AbstractVector, x, guess)
136-
lo, hi = bracketstrictlymontonic(v, x, guess, Base.Order.Forward)
137-
searchsortedfirst(v, x, lo, hi, Base.Order.Forward)
162+
lo, hi = bracketstrictlymontonic(v, x, guess, Base.Order.Forward)
163+
searchsortedfirst(v, x, lo, hi, Base.Order.Forward)
138164
end
139165

140166
function searchsortedlastcorrelated(v::AbstractVector, x, guess)
141-
lo, hi = bracketstrictlymontonic(v, x, guess, Base.Order.Forward)
142-
searchsortedlast(v, x, lo, hi, Base.Order.Forward)
167+
lo, hi = bracketstrictlymontonic(v, x, guess, Base.Order.Forward)
168+
searchsortedlast(v, x, lo, hi, Base.Order.Forward)
143169
end
144170

145171
searchsortedfirstcorrelated(r::AbstractRange, x, _) = searchsortedfirst(r, x)

test/runtests.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,21 @@ using Test
55

66
for n = 0:128
77
x = unique!(rand(Int, n))
8+
s = sort(x)
89
for i = eachindex(x)
910
@test FindFirstFunctions.findfirstequal(x[i], x) == i
11+
@test FindFirstFunctions.findfirstequal(s[i], s) == i
12+
@test FindFirstFunctions.findfirstsortedequal(s[i], s) == i
1013
end
1114
if length(x) > 0
1215
@test FindFirstFunctions.findfirstequal(x[begin], @view(x[begin:end])) === 1
1316
@test FindFirstFunctions.findfirstequal(x[begin], @view(x[begin+1:end])) === nothing
1417
@test FindFirstFunctions.findfirstequal(x[end], @view(x[begin:end-1])) === nothing
1518
end
1619
y = rand(Int)
17-
@test FindFirstFunctions.findfirstequal(y, x) === findfirst(==(y), x)
20+
ff = findfirst(==(y), x)
21+
@test FindFirstFunctions.findfirstequal(y, x) === ff
22+
ff === nothing && @test FindFirstFunctions.findfirstsortedequal(y, x) === nothing
1823
end
1924

2025
end

0 commit comments

Comments
 (0)