|
1 | 1 | module FindFirstFunctions
|
2 | 2 |
|
| 3 | + |
| 4 | +function _findfirstequal(vpivot::Int64, ptr::Ptr{Int64}, len::Int64) |
| 5 | + Base.llvmcall((""" |
| 6 | + declare i8 @llvm.cttz.i8(i8, i1); |
| 7 | + define i64 @entry(i64 %0, i64 %1, i64 %2) #0 { |
| 8 | + top: |
| 9 | + %ivars = inttoptr i64 %1 to i64* |
| 10 | + %btmp = insertelement <8 x i64> undef, i64 %0, i64 0 |
| 11 | + %var = shufflevector <8 x i64> %btmp, <8 x i64> undef, <8 x i32> zeroinitializer |
| 12 | + %lenm7 = add nsw i64 %2, -7 |
| 13 | + %dosimditer = icmp ugt i64 %2, 7 |
| 14 | + br i1 %dosimditer, label %L9.lr.ph, label %L32 |
| 15 | +
|
| 16 | + L9.lr.ph: |
| 17 | + %len8 = and i64 %2, 9223372036854775800 |
| 18 | + br label %L9 |
| 19 | +
|
| 20 | + L9: |
| 21 | + %i = phi i64 [ 0, %L9.lr.ph ], [ %vinc, %L30 ] |
| 22 | + %ivarsi = getelementptr inbounds i64, i64* %ivars, i64 %i |
| 23 | + %vpvi = bitcast i64* %ivarsi to <8 x i64>* |
| 24 | + %v = load <8 x i64>, <8 x i64>* %vpvi, align 8 |
| 25 | + %m = icmp eq <8 x i64> %v, %var |
| 26 | + %mu = bitcast <8 x i1> %m to i8 |
| 27 | + %matchnotfound = icmp eq i8 %mu, 0 |
| 28 | + br i1 %matchnotfound, label %L30, label %L17 |
| 29 | +
|
| 30 | + L17: |
| 31 | + %tz8 = call i8 @llvm.cttz.i8(i8 %mu, i1 true) |
| 32 | + %tz64 = zext i8 %tz8 to i64 |
| 33 | + %vis = add nuw i64 %i, %tz64 |
| 34 | + br label %common.ret |
| 35 | +
|
| 36 | + common.ret: |
| 37 | + %retval = phi i64 [ %vis, %L17 ], [ -1, %L32 ], [ %si, %L51 ], [ -1, %L67 ] |
| 38 | + ret i64 %retval |
| 39 | +
|
| 40 | + L30: |
| 41 | + %vinc = add nuw nsw i64 %i, 8 |
| 42 | + %continue = icmp slt i64 %vinc, %lenm7 |
| 43 | + br i1 %continue, label %L9, label %L32 |
| 44 | +
|
| 45 | + L32: |
| 46 | + %cumi = phi i64 [ 0, %top ], [ %len8, %L30 ] |
| 47 | + %done = icmp eq i64 %cumi, %2 |
| 48 | + br i1 %done, label %common.ret, label %L51 |
| 49 | +
|
| 50 | + L51: |
| 51 | + %si = phi i64 [ %inc, %L67 ], [ %cumi, %L32 ] |
| 52 | + %spi = getelementptr inbounds i64, i64* %ivars, i64 %si |
| 53 | + %svi = load i64, i64* %spi, align 8 |
| 54 | + %match = icmp eq i64 %svi, %0 |
| 55 | + br i1 %match, label %common.ret, label %L67 |
| 56 | +
|
| 57 | + L67: |
| 58 | + %inc = add i64 %si, 1 |
| 59 | + %dobreak = icmp eq i64 %inc, %2 |
| 60 | + br i1 %dobreak, label %common.ret, label %L51 |
| 61 | +
|
| 62 | + } |
| 63 | + attributes #0 = { alwaysinline } |
| 64 | + """, "entry"), Int64, Tuple{Int64,Ptr{Int64},Int64}, vpivot, ptr, |
| 65 | + len) |
| 66 | +end |
| 67 | + |
3 | 68 | findfirstequal(vpivot, ivars) = findfirst(isequal(vpivot), ivars)
|
4 | 69 | function findfirstequal(vpivot::Int64, ivars::DenseVector{Int64})
|
5 | 70 | GC.@preserve ivars begin
|
6 |
| - ret = Base.llvmcall((""" |
7 |
| - declare i8 @llvm.cttz.i8(i8, i1); |
8 |
| - define i64 @entry(i64 %0, i64 %1, i64 %2) #0 { |
9 |
| - top: |
10 |
| - %ivars = inttoptr i64 %1 to i64* |
11 |
| - %btmp = insertelement <8 x i64> undef, i64 %0, i64 0 |
12 |
| - %var = shufflevector <8 x i64> %btmp, <8 x i64> undef, <8 x i32> zeroinitializer |
13 |
| - %lenm7 = add nsw i64 %2, -7 |
14 |
| - %dosimditer = icmp ugt i64 %2, 7 |
15 |
| - br i1 %dosimditer, label %L9.lr.ph, label %L32 |
16 |
| -
|
17 |
| - L9.lr.ph: |
18 |
| - %len8 = and i64 %2, 9223372036854775800 |
19 |
| - br label %L9 |
20 |
| -
|
21 |
| - L9: |
22 |
| - %i = phi i64 [ 0, %L9.lr.ph ], [ %vinc, %L30 ] |
23 |
| - %ivarsi = getelementptr inbounds i64, i64* %ivars, i64 %i |
24 |
| - %vpvi = bitcast i64* %ivarsi to <8 x i64>* |
25 |
| - %v = load <8 x i64>, <8 x i64>* %vpvi, align 8 |
26 |
| - %m = icmp eq <8 x i64> %v, %var |
27 |
| - %mu = bitcast <8 x i1> %m to i8 |
28 |
| - %matchnotfound = icmp eq i8 %mu, 0 |
29 |
| - br i1 %matchnotfound, label %L30, label %L17 |
30 |
| -
|
31 |
| - L17: |
32 |
| - %tz8 = call i8 @llvm.cttz.i8(i8 %mu, i1 true) |
33 |
| - %tz64 = zext i8 %tz8 to i64 |
34 |
| - %vis = add nuw i64 %i, %tz64 |
35 |
| - br label %common.ret |
36 |
| -
|
37 |
| - common.ret: |
38 |
| - %retval = phi i64 [ %vis, %L17 ], [ -1, %L32 ], [ %si, %L51 ], [ -1, %L67 ] |
39 |
| - ret i64 %retval |
40 |
| -
|
41 |
| - L30: |
42 |
| - %vinc = add nuw nsw i64 %i, 8 |
43 |
| - %continue = icmp slt i64 %vinc, %lenm7 |
44 |
| - br i1 %continue, label %L9, label %L32 |
45 |
| -
|
46 |
| - L32: |
47 |
| - %cumi = phi i64 [ 0, %top ], [ %len8, %L30 ] |
48 |
| - %done = icmp eq i64 %cumi, %2 |
49 |
| - br i1 %done, label %common.ret, label %L51 |
50 |
| -
|
51 |
| - L51: |
52 |
| - %si = phi i64 [ %inc, %L67 ], [ %cumi, %L32 ] |
53 |
| - %spi = getelementptr inbounds i64, i64* %ivars, i64 %si |
54 |
| - %svi = load i64, i64* %spi, align 8 |
55 |
| - %match = icmp eq i64 %svi, %0 |
56 |
| - br i1 %match, label %common.ret, label %L67 |
57 |
| -
|
58 |
| - L67: |
59 |
| - %inc = add i64 %si, 1 |
60 |
| - %dobreak = icmp eq i64 %inc, %2 |
61 |
| - br i1 %dobreak, label %common.ret, label %L51 |
62 |
| -
|
63 |
| - } |
64 |
| - attributes #0 = { alwaysinline } |
65 |
| - """, "entry"), Int64, Tuple{Int64,Ptr{Int64},Int64}, vpivot, pointer(ivars), |
66 |
| - length(ivars)) |
| 71 | + ret = _findfirstequal(vpivot, pointer(ivars), length(ivars)) |
67 | 72 | end
|
68 | 73 | ret < 0 ? nothing : ret + 1
|
69 | 74 | end
|
70 | 75 |
|
| 76 | +""" |
| 77 | + findfirstsortedequal(vars::DenseVector{Int64}, var::Int64)::Union{Int64,Nothing} |
| 78 | +
|
| 79 | +Note that this differs from `searchsortedfirst` by returning `nothing` when absent. |
| 80 | +""" |
| 81 | +function findfirstsortedequal(var::Int64, vars::DenseVector{Int64}, |
| 82 | + ::Val{basecase}=Val(64)) where {basecase} |
| 83 | + len = length(vars) |
| 84 | + offset = 0 |
| 85 | + @inbounds while len > basecase |
| 86 | + half = len >>> 1 # half on left, len - half on right |
| 87 | + offset = ifelse(vars[offset+half+1] <= var, half + offset, offset) |
| 88 | + len = len - half |
| 89 | + end |
| 90 | + # maybe occurs in vars[offset+1:offset+len] |
| 91 | + GC.@preserve vars begin |
| 92 | + ret = _findfirstequal(var, pointer(vars) + 8offset, len) |
| 93 | + end |
| 94 | + ret < 0 ? nothing : ret + offset + 1 |
| 95 | +end |
| 96 | + |
| 97 | + |
71 | 98 |
|
72 | 99 | end
|
0 commit comments