Skip to content

Commit 53a1700

Browse files
committed
[AMDGPU][SDAG] Test ISD::PTRADD handling in various special cases
Pre-committing tests to show improvements in a follow-up PR.
1 parent 40319e7 commit 53a1700

File tree

2 files changed

+269
-0
lines changed

2 files changed

+269
-0
lines changed
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=1 < %s | FileCheck --check-prefixes=GFX6,GFX6_PTRADD %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=0 < %s | FileCheck --check-prefixes=GFX6,GFX6_LEGACY %s
4+
5+
; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectMUBUF.
6+
7+
define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
8+
; GFX6_PTRADD-LABEL: v_add_i32:
9+
; GFX6_PTRADD: ; %bb.0:
10+
; GFX6_PTRADD-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
11+
; GFX6_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
12+
; GFX6_PTRADD-NEXT: s_mov_b32 s7, 0x100f000
13+
; GFX6_PTRADD-NEXT: s_mov_b32 s10, 0
14+
; GFX6_PTRADD-NEXT: s_mov_b32 s11, s7
15+
; GFX6_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
16+
; GFX6_PTRADD-NEXT: v_mov_b32_e32 v1, s3
17+
; GFX6_PTRADD-NEXT: v_add_i32_e32 v0, vcc, s2, v0
18+
; GFX6_PTRADD-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
19+
; GFX6_PTRADD-NEXT: s_mov_b32 s8, s10
20+
; GFX6_PTRADD-NEXT: s_mov_b32 s9, s10
21+
; GFX6_PTRADD-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
22+
; GFX6_PTRADD-NEXT: s_waitcnt vmcnt(0)
23+
; GFX6_PTRADD-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
24+
; GFX6_PTRADD-NEXT: s_waitcnt vmcnt(0)
25+
; GFX6_PTRADD-NEXT: s_mov_b32 s6, -1
26+
; GFX6_PTRADD-NEXT: s_mov_b32 s4, s0
27+
; GFX6_PTRADD-NEXT: s_mov_b32 s5, s1
28+
; GFX6_PTRADD-NEXT: v_add_i32_e32 v0, vcc, v2, v0
29+
; GFX6_PTRADD-NEXT: buffer_store_dword v0, off, s[4:7], 0
30+
; GFX6_PTRADD-NEXT: s_endpgm
31+
;
32+
; GFX6_LEGACY-LABEL: v_add_i32:
33+
; GFX6_LEGACY: ; %bb.0:
34+
; GFX6_LEGACY-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
35+
; GFX6_LEGACY-NEXT: s_mov_b32 s7, 0x100f000
36+
; GFX6_LEGACY-NEXT: s_mov_b32 s10, 0
37+
; GFX6_LEGACY-NEXT: s_mov_b32 s11, s7
38+
; GFX6_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0
39+
; GFX6_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
40+
; GFX6_LEGACY-NEXT: s_mov_b64 s[8:9], s[2:3]
41+
; GFX6_LEGACY-NEXT: v_mov_b32_e32 v1, 0
42+
; GFX6_LEGACY-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
43+
; GFX6_LEGACY-NEXT: s_waitcnt vmcnt(0)
44+
; GFX6_LEGACY-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
45+
; GFX6_LEGACY-NEXT: s_waitcnt vmcnt(0)
46+
; GFX6_LEGACY-NEXT: s_mov_b32 s6, -1
47+
; GFX6_LEGACY-NEXT: s_mov_b32 s4, s0
48+
; GFX6_LEGACY-NEXT: s_mov_b32 s5, s1
49+
; GFX6_LEGACY-NEXT: v_add_i32_e32 v0, vcc, v2, v0
50+
; GFX6_LEGACY-NEXT: buffer_store_dword v0, off, s[4:7], 0
51+
; GFX6_LEGACY-NEXT: s_endpgm
52+
%tid = call i32 @llvm.amdgcn.workitem.id.x()
53+
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
54+
%b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1
55+
%a = load volatile i32, ptr addrspace(1) %gep
56+
%b = load volatile i32, ptr addrspace(1) %b_ptr
57+
%result = add i32 %a, %b
58+
store i32 %result, ptr addrspace(1) %out
59+
ret void
60+
}
61+
62+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
63+
; GFX6: {{.*}}

llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,3 +291,209 @@ define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
291291
%gep = getelementptr inbounds i8, ptr %base, i64 %mul
292292
ret ptr %gep
293293
}
294+
295+
; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectGlobalSAddr.
296+
define amdgpu_kernel void @uniform_base_varying_offset_imm(ptr addrspace(1) %p) {
297+
; GFX942_PTRADD-LABEL: uniform_base_varying_offset_imm:
298+
; GFX942_PTRADD: ; %bb.0: ; %entry
299+
; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
300+
; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0
301+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, 0
302+
; GFX942_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
303+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, 1
304+
; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
305+
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
306+
; GFX942_PTRADD-NEXT: global_store_dword v[0:1], v2, off offset:16
307+
; GFX942_PTRADD-NEXT: s_endpgm
308+
;
309+
; GFX942_LEGACY-LABEL: uniform_base_varying_offset_imm:
310+
; GFX942_LEGACY: ; %bb.0: ; %entry
311+
; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
312+
; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0
313+
; GFX942_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0
314+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, 1
315+
; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
316+
; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[0:1] offset:16
317+
; GFX942_LEGACY-NEXT: s_endpgm
318+
entry:
319+
%tid = call i32 @llvm.amdgcn.workitem.id.x()
320+
%shift = shl i32 %tid, 2
321+
%voffset = zext i32 %shift to i64
322+
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %p, i64 %voffset
323+
%gep2 = getelementptr inbounds i8, ptr addrspace(1) %gep1, i64 16
324+
store i32 1, ptr addrspace(1) %gep2
325+
ret void
326+
}
327+
328+
; Adjusted from global-saddr-load.ll. Tests PTRADD handling in
329+
; AMDGPUDAGToDAGISel::SelectSMRDBaseOffset.
330+
define amdgpu_kernel void @global_load_saddr_i32_uniform_offset(ptr addrspace(1) %sbase, i32 %soffset, ptr addrspace(1) %r) {
331+
; GFX942_PTRADD-LABEL: global_load_saddr_i32_uniform_offset:
332+
; GFX942_PTRADD: ; %bb.0:
333+
; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
334+
; GFX942_PTRADD-NEXT: s_load_dword s6, s[4:5], 0x8
335+
; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
336+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, 0
337+
; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
338+
; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, s6
339+
; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, 0
340+
; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
341+
; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
342+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, s0
343+
; GFX942_PTRADD-NEXT: global_store_dword v0, v1, s[2:3]
344+
; GFX942_PTRADD-NEXT: s_endpgm
345+
;
346+
; GFX942_LEGACY-LABEL: global_load_saddr_i32_uniform_offset:
347+
; GFX942_LEGACY: ; %bb.0:
348+
; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
349+
; GFX942_LEGACY-NEXT: s_load_dword s6, s[4:5], 0x8
350+
; GFX942_LEGACY-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
351+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, 0
352+
; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
353+
; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], s6 offset:0x0
354+
; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
355+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, s0
356+
; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[2:3]
357+
; GFX942_LEGACY-NEXT: s_endpgm
358+
%zext.offset = zext i32 %soffset to i64
359+
%gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
360+
%load = load i32, ptr addrspace(1) %gep0
361+
%to.vgpr = bitcast i32 %load to float
362+
store float %to.vgpr, ptr addrspace(1) %r
363+
ret void
364+
}
365+
366+
; Adjusted from llvm.amdgcn.global.load.lds.ll, tests the offset lowering for
367+
; Intrinsic::amdgcn_global_load_lds.
368+
define void @global_load_lds_dword_saddr_and_vaddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr, i32 %voffset) {
369+
; GFX942_PTRADD-LABEL: global_load_lds_dword_saddr_and_vaddr:
370+
; GFX942_PTRADD: ; %bb.0: ; %main_body
371+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, v1
373+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v3, 0
374+
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[0:1], 0, v[2:3]
375+
; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
376+
; GFX942_PTRADD-NEXT: s_mov_b32 m0, s0
377+
; GFX942_PTRADD-NEXT: s_nop 0
378+
; GFX942_PTRADD-NEXT: global_load_lds_dword v[2:3], off offset:48 sc1
379+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
380+
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
381+
;
382+
; GFX942_LEGACY-LABEL: global_load_lds_dword_saddr_and_vaddr:
383+
; GFX942_LEGACY: ; %bb.0: ; %main_body
384+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385+
; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s2, v0
386+
; GFX942_LEGACY-NEXT: s_mov_b32 m0, s2
387+
; GFX942_LEGACY-NEXT: s_nop 0
388+
; GFX942_LEGACY-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1
389+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
390+
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
391+
main_body:
392+
%voffset.64 = zext i32 %voffset to i64
393+
%gep = getelementptr i8, ptr addrspace(1) %gptr, i64 %voffset.64
394+
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gep, ptr addrspace(3) %lptr, i32 4, i32 48, i32 16)
395+
ret void
396+
}
397+
398+
; Taken from shl_add_ptr_global.ll, tests PTRADD handling in
399+
; SITargetLowering::performSHLPtrCombine.
400+
define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) {
401+
; GFX942_PTRADD-LABEL: shl_base_global_ptr_global_atomic_fadd:
402+
; GFX942_PTRADD: ; %bb.0:
403+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404+
; GFX942_PTRADD-NEXT: s_mov_b64 s[0:1], 0x80
405+
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
406+
; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1]
407+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v6, 0x42c80000
408+
; GFX942_PTRADD-NEXT: global_atomic_add_f32 v[4:5], v6, off
409+
; GFX942_PTRADD-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
410+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0)
411+
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
412+
;
413+
; GFX942_LEGACY-LABEL: shl_base_global_ptr_global_atomic_fadd:
414+
; GFX942_LEGACY: ; %bb.0:
415+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416+
; GFX942_LEGACY-NEXT: v_lshlrev_b64 v[0:1], 2, v[4:5]
417+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v6, 0x42c80000
418+
; GFX942_LEGACY-NEXT: global_atomic_add_f32 v[0:1], v6, off offset:512
419+
; GFX942_LEGACY-NEXT: s_mov_b64 s[0:1], 0x80
420+
; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
421+
; GFX942_LEGACY-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
422+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0)
423+
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
424+
%arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 32
425+
%cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64
426+
%shl = shl i64 %cast, 2
427+
%castback = inttoptr i64 %shl to ptr addrspace(1)
428+
%unused = atomicrmw fadd ptr addrspace(1) %castback, float 100.0 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
429+
store volatile i64 %cast, ptr addrspace(1) %extra.use, align 4
430+
ret void
431+
}
432+
433+
; Test PTRADD handling in TargetLowering::SimplifyDemandedBits and
434+
; TargetLowering::ShrinkDemandedOp.
435+
define i32 @gep_in_const_as_cast_to_const32_as(ptr addrspace(4) %src, i64 %offset) {
436+
; GFX942_PTRADD-LABEL: gep_in_const_as_cast_to_const32_as:
437+
; GFX942_PTRADD: ; %bb.0: ; %entry
438+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439+
; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
440+
; GFX942_PTRADD-NEXT: s_mov_b32 s1, 0
441+
; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
442+
; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
443+
; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
444+
; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, s0
445+
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
446+
;
447+
; GFX942_LEGACY-LABEL: gep_in_const_as_cast_to_const32_as:
448+
; GFX942_LEGACY: ; %bb.0: ; %entry
449+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450+
; GFX942_LEGACY-NEXT: v_add_u32_e32 v0, v0, v2
451+
; GFX942_LEGACY-NEXT: s_mov_b32 s1, 0
452+
; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s0, v0
453+
; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], 0x0
454+
; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
455+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, s0
456+
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
457+
entry:
458+
%gep = getelementptr i8, ptr addrspace(4) %src, i64 %offset
459+
%gep.cast = addrspacecast ptr addrspace(4) %gep to ptr addrspace(6)
460+
%l = load i32, ptr addrspace(6) %gep.cast
461+
ret i32 %l
462+
}
463+
464+
@CG = addrspace(4) constant [16 x i32] zeroinitializer, align 4
465+
466+
; Test PTRADD handling in isMemSrcFromConstant.
467+
define void @replace_const0_memcpy_by_memset(ptr align 4 %dst) {
468+
; GFX942_PTRADD-LABEL: replace_const0_memcpy_by_memset:
469+
; GFX942_PTRADD: ; %bb.0: ; %entry
470+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471+
; GFX942_PTRADD-NEXT: s_getpc_b64 s[0:1]
472+
; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, CG@gotpcrel32@lo+4
473+
; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, CG@gotpcrel32@hi+12
474+
; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
475+
; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
476+
; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4
477+
; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
478+
; GFX942_PTRADD-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
479+
; GFX942_PTRADD-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
480+
; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
481+
; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
482+
;
483+
; GFX942_LEGACY-LABEL: replace_const0_memcpy_by_memset:
484+
; GFX942_LEGACY: ; %bb.0: ; %entry
485+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v2, 0
487+
; GFX942_LEGACY-NEXT: v_mov_b32_e32 v3, v2
488+
; GFX942_LEGACY-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
489+
; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
490+
; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
491+
entry:
492+
%gep = getelementptr i8, ptr addrspace(4) @CG, i64 4
493+
tail call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 4 %dst, ptr addrspace(4) noundef nonnull align 4 %gep, i64 8, i1 false)
494+
ret void
495+
}
496+
497+
declare void @llvm.memcpy.p0.p4.i64(ptr noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg)
498+
499+
!0 = !{}

0 commit comments

Comments
 (0)