Skip to content

Commit 384af59

Browse files
jgu222igcbot
authored andcommitted
Correctly set return size of 2d block read
block 2d read requires that each block is multiple of GRF size, thus the total size must be multiple of GRF size. This change makes sure the size is multiple of GRF size. This is required for correctness.
1 parent f8b61ac commit 384af59

File tree

4 files changed

+254
-29
lines changed

4 files changed

+254
-29
lines changed

IGC/Compiler/CISACodeGen/CShader.cpp

+67
Original file line numberDiff line numberDiff line change
@@ -1721,6 +1721,73 @@ uint CShader::GetNbElementAndMask(llvm::Value* value, uint32_t& mask)
17211721
// Number elements = {num GRFs} * {num DWords in GRF} = {num GRFs} * 8;
17221722
return int_cast<unsigned int>(cast<ConstantInt>(numGRFs)->getZExtValue() * 8);
17231723
}
1724+
case GenISAIntrinsic::GenISA_LSC2DBlockRead:
1725+
case GenISAIntrinsic::GenISA_LSC2DBlockReadAddrPayload:
1726+
{
1727+
// 2D block read requires its block size to be multiple of GRF size.
1728+
uint32_t eltBits, blkWidth, blkHeight, numBlks;
1729+
bool isTranspose, isVnni;
1730+
if (IID == GenISAIntrinsic::GenISA_LSC2DBlockRead)
1731+
{
1732+
eltBits = (uint32_t)cast<ConstantInt>(inst->getOperand(6))->getZExtValue();
1733+
blkWidth = (uint32_t)cast<ConstantInt>(inst->getOperand(7))->getZExtValue();
1734+
blkHeight = (uint32_t)cast<ConstantInt>(inst->getOperand(8))->getZExtValue();
1735+
numBlks = (uint32_t)cast<ConstantInt>(inst->getOperand(9))->getZExtValue();
1736+
isTranspose = (uint)cast<ConstantInt>(inst->getOperand(10))->getZExtValue();
1737+
isVnni = (uint)cast<ConstantInt>(inst->getOperand(11))->getZExtValue();
1738+
}
1739+
else
1740+
{
1741+
IGC_ASSERT(IID == GenISAIntrinsic::GenISA_LSC2DBlockReadAddrPayload);
1742+
eltBits = (uint32_t)cast<ConstantInt>(inst->getOperand(3))->getZExtValue();
1743+
blkWidth = (uint32_t)cast<ConstantInt>(inst->getOperand(4))->getZExtValue();
1744+
blkHeight = (uint32_t)cast<ConstantInt>(inst->getOperand(5))->getZExtValue();
1745+
numBlks = (uint32_t)cast<ConstantInt>(inst->getOperand(6))->getZExtValue();
1746+
isTranspose = cast<ConstantInt>(inst->getOperand(7))->getZExtValue();
1747+
isVnni = cast<ConstantInt>(inst->getOperand(8))->getZExtValue();
1748+
}
1749+
1750+
// Width is padded to the next power-of-2 value
1751+
uint32_t blkWidthCeil = (uint32_t)PowerOf2Ceil(blkWidth);
1752+
if (blkWidthCeil != blkWidth)
1753+
{
1754+
m_ctx->EmitWarning("Block2D: block width not power of 2, zero padded.");
1755+
}
1756+
uint32_t blkHeightCeil = blkHeight;
1757+
if (isTranspose)
1758+
{
1759+
blkHeightCeil = (uint32_t)PowerOf2Ceil(blkHeight);
1760+
if (blkHeightCeil != blkHeight)
1761+
{
1762+
m_ctx->EmitWarning("Block2D: transpose block height not power of 2, zero padded.");
1763+
}
1764+
}
1765+
if (isVnni)
1766+
{
1767+
IGC_ASSERT(eltBits == 16 || eltBits == 8);
1768+
uint32_t N = 32 / eltBits;
1769+
uint32_t origVal = blkHeightCeil;
1770+
blkHeightCeil = (uint32_t)divideCeil(blkHeightCeil, N) * N;
1771+
if (blkHeightCeil != origVal)
1772+
{
1773+
m_ctx->EmitWarning("Block2D: transform block height not multiple "
1774+
"of N (32/eltBits), zero padded.");
1775+
}
1776+
}
1777+
uint32_t blkBits = blkWidthCeil * blkHeightCeil * eltBits;
1778+
uint32_t numGRFsPerBlk = (uint32_t)divideCeil(blkBits, getGRFSize() * 8);
1779+
uint32_t blkBitsCeil = getGRFSize() * 8 * numGRFsPerBlk;
1780+
if (blkBitsCeil != blkBits)
1781+
{
1782+
m_ctx->EmitWarning("Block2D: block size not multiple of GRF size, zero padded.");
1783+
}
1784+
uint32_t numGRFs = numGRFsPerBlk * numBlks;
1785+
VISA_Type visaTy = GetType(inst->getType());
1786+
uint32_t eltTyBytes = CEncoder::GetCISADataTypeSize(visaTy);
1787+
uint32_t nbElement = (uint32_t)divideCeil(numGRFs * getGRFSize(), eltTyBytes);
1788+
1789+
return nbElement;
1790+
}
17241791
default:
17251792
break;
17261793
}

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

-29
Original file line numberDiff line numberDiff line change
@@ -24710,21 +24710,6 @@ void EmitPass::emitLSC2DBlockOperation(llvm::GenIntrinsicInst* inst)
2471024710
bool isVnni = (uint)cast<ConstantInt>(inst->getOperand(11))->getZExtValue();
2471124711

2471224712
CVariable* destination = m_destination;
24713-
if (numBlocksV == 2 && blockHeight == 1 &&
24714-
!isPrefetch &&
24715-
elemSizeInBits * blockWidth == 256 &&
24716-
m_currShader->m_Platform->getPlatformInfo().eProductFamily >= IGFX_PVC)
24717-
{
24718-
// m1 v2 block read has grf aligned V blocks,
24719-
// variable should be 2x larger
24720-
destination = m_currShader->GetNewVariable(
24721-
m_destination->GetNumberElement() * 2,
24722-
m_destination->GetType(),
24723-
m_destination->GetAlign(),
24724-
m_destination->IsUniform(),
24725-
m_destination->GetNumberInstance(),
24726-
CName::NONE);
24727-
}
2472824713
LSC_CACHE_OPTS cacheOpts = translateLSCCacheControlsFromValue(inst->getOperand(12), isRead);
2472924714

2473024715
if (isRead == false)
@@ -24755,20 +24740,6 @@ void EmitPass::emitLSC2DBlockOperation(llvm::GenIntrinsicInst* inst)
2475524740
pFlatImagePitch,
2475624741
cacheOpts);
2475724742
m_encoder->Push();
24758-
24759-
24760-
if (isRead &&
24761-
!isPrefetch &&
24762-
destination != m_destination)
24763-
{
24764-
// m1 v2 block read
24765-
m_encoder->Copy(m_destination, destination);
24766-
m_encoder->Push();
24767-
m_encoder->SetSrcSubVar(0, 1);
24768-
m_encoder->SetDstSubReg(m_destination->GetNumberElement() / 2);
24769-
m_encoder->Copy(m_destination, destination);
24770-
m_encoder->Push();
24771-
}
2477224743
return;
2477324744
}
2477424745

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2023 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; REQUIRES: regkeys, llvm-14-plus
9+
10+
; RUN: igc_opt --opaque-pointers -platformpvc -igc-emit-visa -regkey EnableDebugging -simd-mode 16 %s | FileCheck %s
11+
; ------------------------------------------------
12+
; EmitVISAPass
13+
; ------------------------------------------------
14+
15+
;; Test read payload size for LSC2DBlockReadAddrPayload intrinsics
16+
;;
17+
18+
define spir_kernel void @test(i64 %b, i32 %x, i32 %y, i32 %k) {
19+
entry:
20+
;
21+
; case 0: u32_m7k1 transpose
22+
;
23+
; CHECK: Block2D: transpose block height not power of 2, zero padded.
24+
;
25+
%ap = call i32* @llvm.genx.GenISA.LSC2DBlockCreateAddrPayload.p0i32(i64 %b, i32 0, i32 1023, i32 1023, i32 %x, i32 %y, i32 1, i32 7, i32 1)
26+
%val = call i32 @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.p0i32(i32* %ap, i32 0, i32 0, i32 32, i32 1, i32 7, i32 1, i1 true, i1 false, i32 0)
27+
%tmp0 = add i64 %b, 1024
28+
%addr0 = inttoptr i64 %tmp0 to i32 addrspace(1)*
29+
store i32 %val, i32 addrspace(1)* %addr0, align 4
30+
31+
32+
;
33+
; case 1: u16_m15k2 vnni transform
34+
;
35+
; CHECK: Block2D: block size not multiple of GRF size, zero padded
36+
; CHECK: Block2D: transform block height not multiple of N (32/eltBits), zero padded
37+
;
38+
%ap1 = call i32* @llvm.genx.GenISA.LSC2DBlockCreateAddrPayload.p0i32(i64 %b, i32 0, i32 1023, i32 1023, i32 %x, i32 %y, i32 2, i32 15, i32 1)
39+
%val1 = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.p0i32.v2i16(i32* %ap, i32 0, i32 0, i32 16, i32 2, i32 15, i32 1, i1 false, i1 true, i32 0)
40+
%tmp1 = add i64 %b, 2048
41+
%val10 = bitcast <2 x i16> %val1 to i32
42+
%addr1 = inttoptr i64 %tmp1 to i32 addrspace(1)*
43+
store i32 %val10, i32 addrspace(1)* %addr1, align 4
44+
45+
ret void
46+
}
47+
48+
declare i32* @llvm.genx.GenISA.LSC2DBlockCreateAddrPayload.p0i32(i64, i32, i32, i32, i32, i32, i32, i32, i32)
49+
declare <2 x i16> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.p0i32.v2i16(i32*, i32, i32, i32, i32, i32, i32, i1, i1, i32)
50+
declare i32 @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.p0i32(i32*, i32, i32, i32, i32, i32, i32, i1, i1, i32)
51+
52+
53+
!IGCMetadata = !{!0}
54+
!igc.functions = !{!21}
55+
56+
!0 = !{!"ModuleMD", !1}
57+
!1 = !{!"FuncMD", !2, !3}
58+
!2 = !{!"FuncMDMap[0]", void (i64, i32, i32, i32)* @test}
59+
!3 = !{!"FuncMDValue[0]", !4, !17}
60+
!4 = !{!"resAllocMD", !5}
61+
!5 = !{!"argAllocMDList", !6, !10, !11, !14, !15, !16}
62+
!6 = !{!"argAllocMDListVec[0]", !7, !8, !9}
63+
!7 = !{!"type", i32 0}
64+
!8 = !{!"extensionType", i32 -1}
65+
!9 = !{!"indexType", i32 -1}
66+
!10 = !{!"argAllocMDListVec[1]", !7, !8, !9}
67+
!11 = !{!"argAllocMDListVec[2]", !12, !8, !13}
68+
!12 = !{!"type", i32 1}
69+
!13 = !{!"indexType", i32 0}
70+
!14 = !{!"argAllocMDListVec[3]", !7, !8, !9}
71+
!15 = !{!"argAllocMDListVec[4]", !7, !8, !9}
72+
!16 = !{!"argAllocMDListVec[5]", !7, !8, !9}
73+
!17 = !{!"m_OpenCLArgTypeQualifiers", !18, !19, !20}
74+
!18 = !{!"m_OpenCLArgTypeQualifiersVec[0]", !""}
75+
!19 = !{!"m_OpenCLArgTypeQualifiersVec[1]", !""}
76+
!20 = !{!"m_OpenCLArgTypeQualifiersVec[2]", !""}
77+
!21 = !{void (i64, i32, i32, i32)* @test, !22}
78+
!22 = !{!23, !24}
79+
!23 = !{!"function_type", i32 0}
80+
!24 = !{!"sub_group_size", i32 16}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; This test runs vISA EmitPass and checks if 2d block read needs zero padding.
9+
; It checks warning messages
10+
11+
; REQUIRES: llvm-14-plus, regkeys
12+
13+
; RUN: igc_opt --opaque-pointers -platformpvc -igc-emit-visa %s -regkey EnableDebugging \
14+
; RUN: -simd-mode 16 | FileCheck %s
15+
16+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
17+
target triple = "spir64-unknown-unknown"
18+
19+
;
20+
; Function Attrs: convergent nounwind null_pointer_is_valid
21+
define spir_kernel void @test_2dblock_read_zero_padding(i16 addrspace(1)* align 2 %dst, i64 %base, i32 %widthm1, i32 %heightm1, i32 %pitchm1, i32 %x, i32 %y, <8 x i32> %r0, <8 x i32> %payloadHeader, <3 x i32> %enqueuedLocalSize, i16 %localIdX, i16 %localIdY, i16 %localIdZ, i32 %bufferOffset) #1 {
22+
entry:
23+
%ibase = ptrtoint i16 addrspace(1)* %dst to i64
24+
%lid = zext i16 %localIdX to i64
25+
%tmp = add i64 %ibase, %lid
26+
;
27+
; case 0 u32_m1k8v2
28+
; CHECK: warning: Block2D: block size not multiple of GRF size, zero padded
29+
;
30+
%res0 = call <2 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v2i32(i64 %base, i32 %widthm1, i32 %heightm1, i32 %pitchm1, i32 %x, i32 %y, i32 32, i32 8, i32 1, i32 2, i1 false, i1 false, i32 0)
31+
%tmp0 = add i64 %tmp, 16
32+
%addr0 = inttoptr i64 %tmp0 to <2 x i32> addrspace(1)*
33+
store <2 x i32> %res0, <2 x i32> addrspace(1)* %addr0, align 8
34+
35+
;
36+
; case 1 u32_m7k2 transpose
37+
; CHECK: warning: Block2D: transpose block height not power of 2, zero padded
38+
;
39+
%res1 = call i32 @llvm.genx.GenISA.LSC2DBlockRead.i32(i64 %base, i32 %widthm1, i32 %heightm1, i32 %pitchm1, i32 %x, i32 %y, i32 32, i32 2, i32 7, i32 1, i1 true, i1 false, i32 0)
40+
%tmp1 = add i64 %tmp, 128
41+
%addr1 = inttoptr i64 %tmp1 to i32 addrspace(1)*
42+
store i32 %res1, i32 addrspace(1)* %addr1, align 8
43+
44+
;
45+
; case 2 u8_m29k2 vnni transform
46+
; CHECK: warning: Block2D: transform block height not multiple of N (32/eltBits), zero padded.
47+
;
48+
%res2 = call <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64 %base, i32 %widthm1, i32 %heightm1, i32 %pitchm1, i32 %x, i32 %y, i32 8, i32 2, i32 29, i32 1, i1 false, i1 true, i32 0)
49+
%tmp2 = add i64 %tmp, 512
50+
%res20 = bitcast <2 x i16> %res2 to i32
51+
%addr2 = inttoptr i64 %tmp2 to i32 addrspace(1)*
52+
store i32 %res20, i32 addrspace(1)* %addr2, align 8
53+
54+
ret void
55+
}
56+
57+
declare <2 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v2i16(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)
58+
declare <2 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v2i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)
59+
declare i32 @llvm.genx.GenISA.LSC2DBlockRead.i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)
60+
61+
!igc.functions = !{!3}
62+
!IGCMetadata = !{!16}
63+
64+
!3 = !{void (i16 addrspace(1)*, i64, i32, i32, i32, i32, i32, <8 x i32>, <8 x i32>, <3 x i32>, i16, i16, i16, i32)* @test_2dblock_read_zero_padding, !4}
65+
!4 = !{!5, !6, !15}
66+
!5 = !{!"function_type", i32 0}
67+
!6 = !{!"implicit_arg_desc", !7, !8, !9, !10, !11, !12, !13}
68+
!7 = !{i32 0}
69+
!8 = !{i32 1}
70+
!9 = !{i32 7}
71+
!10 = !{i32 8}
72+
!11 = !{i32 9}
73+
!12 = !{i32 10}
74+
!13 = !{i32 15, !14}
75+
!14 = !{!"explicit_arg_num", i32 0}
76+
!15 = !{!"sub_group_size", i32 16}
77+
!16 = !{!"ModuleMD", !131}
78+
!131 = !{!"FuncMD", !132, !133}
79+
!132 = !{!"FuncMDMap[0]", void (i16 addrspace(1)*, i64, i32, i32, i32, i32, i32, <8 x i32>, <8 x i32>, <3 x i32>, i16, i16, i16, i32)* @test_2dblock_read_zero_padding}
80+
!133 = !{!"FuncMDValue[0]", !166, !237}
81+
!166 = !{!"resAllocMD", !170}
82+
!170 = !{!"argAllocMDList", !171, !175, !176, !177, !178, !179, !180, !181, !182, !183, !184, !185, !186, !187}
83+
!171 = !{!"argAllocMDListVec[0]", !172, !173, !174}
84+
!172 = !{!"type", i32 0}
85+
!173 = !{!"extensionType", i32 -1}
86+
!174 = !{!"indexType", i32 -1}
87+
!175 = !{!"argAllocMDListVec[1]", !172, !173, !174}
88+
!176 = !{!"argAllocMDListVec[2]", !172, !173, !174}
89+
!177 = !{!"argAllocMDListVec[3]", !172, !173, !174}
90+
!178 = !{!"argAllocMDListVec[4]", !172, !173, !174}
91+
!179 = !{!"argAllocMDListVec[5]", !172, !173, !174}
92+
!180 = !{!"argAllocMDListVec[6]", !172, !173, !174}
93+
!181 = !{!"argAllocMDListVec[7]", !172, !173, !174}
94+
!182 = !{!"argAllocMDListVec[8]", !172, !173, !174}
95+
!183 = !{!"argAllocMDListVec[9]", !172, !173, !174}
96+
!184 = !{!"argAllocMDListVec[10]", !172, !173, !174}
97+
!185 = !{!"argAllocMDListVec[11]", !172, !173, !174}
98+
!186 = !{!"argAllocMDListVec[12]", !172, !173, !174}
99+
!187 = !{!"argAllocMDListVec[13]", !172, !173, !174}
100+
!237 = !{!"m_OpenCLArgTypeQualifiers", !238, !239, !240, !241, !242, !243, !244}
101+
!238 = !{!"m_OpenCLArgTypeQualifiersVec[0]", !""}
102+
!239 = !{!"m_OpenCLArgTypeQualifiersVec[1]", !""}
103+
!240 = !{!"m_OpenCLArgTypeQualifiersVec[2]", !""}
104+
!241 = !{!"m_OpenCLArgTypeQualifiersVec[3]", !""}
105+
!242 = !{!"m_OpenCLArgTypeQualifiersVec[4]", !""}
106+
!243 = !{!"m_OpenCLArgTypeQualifiersVec[5]", !""}
107+
!244 = !{!"m_OpenCLArgTypeQualifiersVec[6]", !""}

0 commit comments

Comments
 (0)