Skip to content

Commit fd5fc76

Browse files
shiltianrampitec
andauthored
[AMDGPU] Add support for v_cos_bf16 on gfx1250 (#149355)
Co-authored-by: Mekhanoshin, Stanislav <[email protected]>
1 parent 73d4cea commit fd5fc76

23 files changed

+819
-0
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,7 @@ TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts")
675675
TARGET_BUILTIN(__builtin_amdgcn_log_bf16, "yy", "nc", "bf16-trans-insts")
676676
TARGET_BUILTIN(__builtin_amdgcn_exp2_bf16, "yy", "nc", "bf16-trans-insts")
677677
TARGET_BUILTIN(__builtin_amdgcn_sin_bf16, "yy", "nc", "bf16-trans-insts")
678+
TARGET_BUILTIN(__builtin_amdgcn_cos_bf16, "yy", "nc", "bf16-trans-insts")
678679

679680
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
680681
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
433433
return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
434434
case AMDGPU::BI__builtin_amdgcn_cosf:
435435
case AMDGPU::BI__builtin_amdgcn_cosh:
436+
case AMDGPU::BI__builtin_amdgcn_cos_bf16:
436437
return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
437438
case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
438439
return EmitAMDGPUDispatchPtr(*this, E);

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,25 @@ void test_sin_bf16(global __bf16* out, __bf16 a)
156156
*out = __builtin_amdgcn_sin_bf16(a);
157157
}
158158

159+
// CHECK-LABEL: @test_cos_bf16(
160+
// CHECK-NEXT: entry:
161+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
162+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
163+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
164+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
165+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
166+
// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
167+
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2
168+
// CHECK-NEXT: [[TMP1:%.*]] = call bfloat @llvm.amdgcn.cos.bf16(bfloat [[TMP0]])
169+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
170+
// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
171+
// CHECK-NEXT: ret void
172+
//
173+
void test_cos_bf16(global __bf16* out, __bf16 a)
174+
{
175+
*out = __builtin_amdgcn_cos_bf16(a);
176+
}
177+
159178
// CHECK-LABEL: @test_cvt_f16_fp8(
160179
// CHECK-NEXT: entry:
161180
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,7 @@ defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>;
535535
defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>;
536536
defm V_EXP_BF16 : VOP1Inst_t16 <"v_exp_bf16", VOP_BF16_BF16, AMDGPUexpf16>;
537537
defm V_SIN_BF16 : VOP1Inst_t16 <"v_sin_bf16", VOP_BF16_BF16, AMDGPUsin>;
538+
defm V_COS_BF16 : VOP1Inst_t16 <"v_cos_bf16", VOP_BF16_BF16, AMDGPUcos>;
538539
}
539540
} // End TRANS = 1, SchedRW = [WriteTrans32]
540541
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
@@ -1149,6 +1150,7 @@ defm V_RSQ_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07b>;
11491150
defm V_LOG_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07c>;
11501151
defm V_EXP_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07d>;
11511152
defm V_SIN_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07e>;
1153+
defm V_COS_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07f>;
11521154

11531155
//===----------------------------------------------------------------------===//
11541156
// GFX10.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN %s
2+
; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
3+
4+
; FIXME: GlobalISel does not work with bf16
5+
6+
declare bfloat @llvm.amdgcn.cos.bf16(bfloat) #0
7+
8+
; GCN-LABEL: {{^}}cos_bf16:
9+
; GCN: v_cos_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}}
10+
define amdgpu_kernel void @cos_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
11+
%cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat %src) #0
12+
store bfloat %cos, ptr addrspace(1) %out, align 2
13+
ret void
14+
}
15+
16+
; GCN-LABEL: {{^}}cos_bf16_constant_4
17+
; GCN: v_cos_bf16_e32 v0, 4.0
18+
define amdgpu_kernel void @cos_bf16_constant_4(ptr addrspace(1) %out) #1 {
19+
%cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat 4.0) #0
20+
store bfloat %cos, ptr addrspace(1) %out, align 2
21+
ret void
22+
}
23+
24+
; GCN-LABEL: {{^}}cos_bf16_constant_100
25+
; GCN: v_cos_bf16_e32 {{v[0-9]+}}, 0x42c8
26+
define amdgpu_kernel void @cos_bf16_constant_100(ptr addrspace(1) %out) #1 {
27+
%cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat 100.0) #0
28+
store bfloat %cos, ptr addrspace(1) %out, align 2
29+
ret void
30+
}
31+
32+
attributes #0 = { nounwind readnone }
33+
attributes #1 = { nounwind }

llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,51 @@ v_sin_bf16 v5, src_scc
343343
v_sin_bf16 v127, 0x8000
344344
// GFX1250: v_sin_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfc,0xfe,0x7e,0x00,0x80,0x00,0x00]
345345

346+
v_cos_bf16 v5, v1
347+
// GFX1250: v_cos_bf16_e32 v5, v1 ; encoding: [0x01,0xff,0x0a,0x7e]
348+
349+
v_cos_bf16 v5, v127
350+
// GFX1250: v_cos_bf16_e32 v5, v127 ; encoding: [0x7f,0xff,0x0a,0x7e]
351+
352+
v_cos_bf16 v5, s1
353+
// GFX1250: v_cos_bf16_e32 v5, s1 ; encoding: [0x01,0xfe,0x0a,0x7e]
354+
355+
v_cos_bf16 v5, s105
356+
// GFX1250: v_cos_bf16_e32 v5, s105 ; encoding: [0x69,0xfe,0x0a,0x7e]
357+
358+
v_cos_bf16 v5, vcc_lo
359+
// GFX1250: v_cos_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfe,0x0a,0x7e]
360+
361+
v_cos_bf16 v5, vcc_hi
362+
// GFX1250: v_cos_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfe,0x0a,0x7e]
363+
364+
v_cos_bf16 v5, ttmp15
365+
// GFX1250: v_cos_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfe,0x0a,0x7e]
366+
367+
v_cos_bf16 v5, m0
368+
// GFX1250: v_cos_bf16_e32 v5, m0 ; encoding: [0x7d,0xfe,0x0a,0x7e]
369+
370+
v_cos_bf16 v5, exec_lo
371+
// GFX1250: v_cos_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfe,0x0a,0x7e]
372+
373+
v_cos_bf16 v5, exec_hi
374+
// GFX1250: v_cos_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfe,0x0a,0x7e]
375+
376+
v_cos_bf16 v5, null
377+
// GFX1250: v_cos_bf16_e32 v5, null ; encoding: [0x7c,0xfe,0x0a,0x7e]
378+
379+
v_cos_bf16 v5, -1
380+
// GFX1250: v_cos_bf16_e32 v5, -1 ; encoding: [0xc1,0xfe,0x0a,0x7e]
381+
382+
v_cos_bf16 v5, 0.5
383+
// GFX1250: v_cos_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfe,0x0a,0x7e]
384+
385+
v_cos_bf16 v5, src_scc
386+
// GFX1250: v_cos_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfe,0x0a,0x7e]
387+
388+
v_cos_bf16 v127, 0x8000
389+
// GFX1250: v_cos_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00]
390+
346391
v_cvt_f32_bf16 v5, v1
347392
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
348393

llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,54 @@ v_sin_bf16 v127, 0x8000
364364
v_sin_bf16 v5.h, v1.h
365365
// GFX1250: v_sin_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xfd,0x0a,0x7f]
366366

367+
v_cos_bf16 v5, v1
368+
// GFX1250: v_cos_bf16_e32 v5, v1 ; encoding: [0x01,0xff,0x0a,0x7e]
369+
370+
v_cos_bf16 v5, v127
371+
// GFX1250: v_cos_bf16_e32 v5, v127 ; encoding: [0x7f,0xff,0x0a,0x7e]
372+
373+
v_cos_bf16 v5, s1
374+
// GFX1250: v_cos_bf16_e32 v5, s1 ; encoding: [0x01,0xfe,0x0a,0x7e]
375+
376+
v_cos_bf16 v5, s105
377+
// GFX1250: v_cos_bf16_e32 v5, s105 ; encoding: [0x69,0xfe,0x0a,0x7e]
378+
379+
v_cos_bf16 v5, vcc_lo
380+
// GFX1250: v_cos_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfe,0x0a,0x7e]
381+
382+
v_cos_bf16 v5, vcc_hi
383+
// GFX1250: v_cos_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfe,0x0a,0x7e]
384+
385+
v_cos_bf16 v5, ttmp15
386+
// GFX1250: v_cos_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfe,0x0a,0x7e]
387+
388+
v_cos_bf16 v5, m0
389+
// GFX1250: v_cos_bf16_e32 v5, m0 ; encoding: [0x7d,0xfe,0x0a,0x7e]
390+
391+
v_cos_bf16 v5, exec_lo
392+
// GFX1250: v_cos_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfe,0x0a,0x7e]
393+
394+
v_cos_bf16 v5, exec_hi
395+
// GFX1250: v_cos_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfe,0x0a,0x7e]
396+
397+
v_cos_bf16 v5, null
398+
// GFX1250: v_cos_bf16_e32 v5, null ; encoding: [0x7c,0xfe,0x0a,0x7e]
399+
400+
v_cos_bf16 v5, -1
401+
// GFX1250: v_cos_bf16_e32 v5, -1 ; encoding: [0xc1,0xfe,0x0a,0x7e]
402+
403+
v_cos_bf16 v5, 0.5
404+
// GFX1250: v_cos_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfe,0x0a,0x7e]
405+
406+
v_cos_bf16 v5, src_scc
407+
// GFX1250: v_cos_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfe,0x0a,0x7e]
408+
409+
v_cos_bf16 v127, 0x8000
410+
// GFX1250: v_cos_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00]
411+
412+
v_cos_bf16 v5.h, v1.h
413+
// GFX1250: v_cos_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xff,0x0a,0x7f]
414+
367415
v_cvt_f32_bf16 v5, v1
368416
// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e]
369417

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,62 @@ v_sin_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi
394394
// GFX1250: v_sin_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfc,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
395395
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
396396

397+
v_cos_bf16 v5, v1 quad_perm:[3,2,1,0]
398+
// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff]
399+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
400+
401+
v_cos_bf16 v5, v1 quad_perm:[0,1,2,3]
402+
// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff]
403+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
404+
405+
v_cos_bf16 v5, v1 row_mirror
406+
// GFX1250: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff]
407+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
408+
409+
v_cos_bf16 v5, v1 row_half_mirror
410+
// GFX1250: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff]
411+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
412+
413+
v_cos_bf16 v5, v1 row_shl:1
414+
// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff]
415+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
416+
417+
v_cos_bf16 v5, v1 row_shl:15
418+
// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff]
419+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
420+
421+
v_cos_bf16 v5, v1 row_shr:1
422+
// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff]
423+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
424+
425+
v_cos_bf16 v5, v1 row_shr:15
426+
// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff]
427+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
428+
429+
v_cos_bf16 v5, v1 row_ror:1
430+
// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff]
431+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
432+
433+
v_cos_bf16 v5, v1 row_ror:15
434+
// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff]
435+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
436+
437+
v_cos_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
438+
// GFX1250: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff]
439+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
440+
441+
v_cos_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
442+
// GFX1250: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01]
443+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
444+
445+
v_cos_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
446+
// GFX1250: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13]
447+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
448+
449+
v_cos_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
450+
// GFX1250: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
451+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
452+
397453
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
398454
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
399455
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,66 @@ v_sin_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
422422
// GFX1250: v_sin_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff]
423423
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
424424

425+
v_cos_bf16 v5, v1 quad_perm:[3,2,1,0]
426+
// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff]
427+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
428+
429+
v_cos_bf16 v5, v1 quad_perm:[0,1,2,3]
430+
// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff]
431+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
432+
433+
v_cos_bf16 v5, v1 row_mirror
434+
// GFX1250: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff]
435+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
436+
437+
v_cos_bf16 v5, v1 row_half_mirror
438+
// GFX1250: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff]
439+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
440+
441+
v_cos_bf16 v5, v1 row_shl:1
442+
// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff]
443+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
444+
445+
v_cos_bf16 v5, v1 row_shl:15
446+
// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff]
447+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
448+
449+
v_cos_bf16 v5, v1 row_shr:1
450+
// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff]
451+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
452+
453+
v_cos_bf16 v5, v1 row_shr:15
454+
// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff]
455+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
456+
457+
v_cos_bf16 v5, v1 row_ror:1
458+
// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff]
459+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
460+
461+
v_cos_bf16 v5, v1 row_ror:15
462+
// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff]
463+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
464+
465+
v_cos_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
466+
// GFX1250: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff]
467+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
468+
469+
v_cos_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
470+
// GFX1250: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01]
471+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
472+
473+
v_cos_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
474+
// GFX1250: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13]
475+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
476+
477+
v_cos_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
478+
// GFX1250: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
479+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
480+
481+
v_cos_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
482+
// GFX1250: v_cos_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7f,0x81,0x1b,0x00,0xff]
483+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
484+
425485
v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0]
426486
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
427487
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,18 @@ v_sin_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
8686
// GFX1250: v_sin_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00]
8787
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
8888

89+
v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
90+
// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
91+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
92+
93+
v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
94+
// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05]
95+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
96+
97+
v_cos_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
98+
// GFX1250: v_cos_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00]
99+
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
100+
89101
v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
90102
// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
91103
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU

0 commit comments

Comments
 (0)