Skip to content

Commit ebfc688

Browse files
MaciejKalinskigfxbot
authored andcommitted
New intrinsic GenISA_URBReadOutput added, required by a new vulkan feature.
Intended for urb output data loads, using urb write handle provided in R0 header Change-Id: I2a0cb3cacb1e331d6aa078b81bf3e236a76912db
1 parent 83c14ed commit ebfc688

File tree

5 files changed

+81
-77
lines changed

5 files changed

+81
-77
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

+51-2
Original file line numberDiff line numberDiff line change
@@ -5873,6 +5873,46 @@ void EmitPass::emitURBRead(llvm::GenIntrinsicInst* inst)
58735873
m_currShader->isInputsPulled = true;
58745874
}
58755875

5876+
void EmitPass::emitURBReadOutput(QuadEltUnit globalOffset, CVariable* pPerSlotOffsetVar, CVariable* pDest)
5877+
{
5878+
const bool hasPerSlotOffsets = pPerSlotOffsetVar != nullptr;
5879+
// Payload size is just URB handles (1 GRF) or URB handles and per-slot offsets (2 GRFs).
5880+
const Unit<Element> payloadSize(hasPerSlotOffsets ? 2 : 1);
5881+
5882+
CVariable* pPayload =
5883+
m_currShader->GetNewVariable(payloadSize.Count() * numLanes(m_SimdMode), ISA_TYPE_UD, EALIGN_GRF);
5884+
5885+
// get the register with URBHandles
5886+
m_encoder->Copy(pPayload, m_currShader->GetURBOutputHandle());
5887+
m_encoder->Push();
5888+
5889+
// If we have runtime value in per-slot offsets, we need to copy per-slot offsets to payload
5890+
if (hasPerSlotOffsets)
5891+
{
5892+
m_encoder->SetDstSubVar(1);
5893+
m_encoder->Copy(pPayload, pPerSlotOffsetVar);
5894+
m_encoder->Push();
5895+
}
5896+
5897+
constexpr bool eot = false;
5898+
const Unit<Element> messageLength = payloadSize;
5899+
const Unit<Element> responseLength(pDest->GetNumberElement() / numLanes(m_SimdMode));
5900+
const uint desc = UrbMessage(
5901+
messageLength.Count(),
5902+
responseLength.Count(),
5903+
eot,
5904+
hasPerSlotOffsets,
5905+
false,
5906+
globalOffset.Count(),
5907+
EU_GEN8_URB_OPCODE_SIMD8_READ);
5908+
5909+
const uint exDesc = EU_MESSAGE_TARGET_URB | (eot ? 1 << 5 : 0);
5910+
CVariable* pMessDesc = m_currShader->ImmToVariable(desc, ISA_TYPE_UD);
5911+
5912+
m_encoder->Send(pDest, pPayload, exDesc, pMessDesc);
5913+
m_encoder->Push();
5914+
}
5915+
58765916
void EmitPass::emitURBWrite(llvm::GenIntrinsicInst* inst)
58775917
{
58785918
// input: GenISA_URBWrite(%offset, %mask, %data0, ..., %data7)
@@ -7142,6 +7182,9 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
71427182
case GenISAIntrinsic::GenISA_URBRead:
71437183
emitURBRead(inst);
71447184
break;
7185+
case GenISAIntrinsic::GenISA_URBReadOutput:
7186+
emitURBReadOutput(QuadEltUnit(0), GetSymbol(inst->getOperand(0)), m_destination);
7187+
break;
71457188
case GenISAIntrinsic::GenISA_cycleCounter:
71467189
emitcycleCounter(inst);
71477190
break;
@@ -12162,14 +12205,20 @@ void EmitPass::emitHSPatchConstantInput(llvm::Instruction* pInst)
1216212205
{
1216312206
assert(m_currShader->GetShaderType() == ShaderType::HULL_SHADER);
1216412207
CHullShader* hsProgram = static_cast<CHullShader*>(m_currShader);
12165-
hsProgram->EmitPatchConstantInput(pInst, m_destination);
12208+
QuadEltUnit attributeOffset(0);
12209+
CVariable* pPerSlotOffsetVar = nullptr;
12210+
hsProgram->EmitPatchConstantInput(pInst, attributeOffset, pPerSlotOffsetVar);
12211+
emitURBReadOutput(attributeOffset, pPerSlotOffsetVar, m_destination);
1216612212
}
1216712213

1216812214
void EmitPass::emitHSOutputControlPtInput(llvm::Instruction* pInst)
1216912215
{
1217012216
assert(m_currShader->GetShaderType() == ShaderType::HULL_SHADER);
1217112217
CHullShader* hsProgram = static_cast<CHullShader*>(m_currShader);
12172-
hsProgram->EmitOutputControlPointInput(pInst, m_destination);
12218+
QuadEltUnit attributeOffset(0);
12219+
CVariable* pPerSlotOffsetVar = nullptr;
12220+
hsProgram->EmitOutputControlPointInput(pInst, attributeOffset, pPerSlotOffsetVar);
12221+
emitURBReadOutput(attributeOffset, pPerSlotOffsetVar, m_destination);
1217312222
}
1217412223

1217512224
void EmitPass::emitHSTessFactors(llvm::Instruction* pInst)

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ class EmitPass : public llvm::FunctionPass
216216
void emitMediaBlockRectangleRead(llvm::Instruction* inst);
217217
void emitURBWrite(llvm::GenIntrinsicInst* inst);
218218
void emitURBRead(llvm::GenIntrinsicInst* inst);
219+
void emitURBReadOutput(QuadEltUnit globalOffset, CVariable* pPerSlotOffsetVar, CVariable* pDest);
219220
void emitSampleInstruction(llvm::SampleIntrinsic* inst);
220221
void emitLdInstruction(llvm::Instruction* inst);
221222
void emitInfoInstruction(llvm::InfoIntrinsic* inst);

IGC/Compiler/CISACodeGen/HullShaderCodeGen.cpp

+20-65
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ void CHullShader::AllocateEightPatchPayload()
192192

193193
assert(offset % getGRFSize() == 0);
194194
ProgramOutput()->m_startReg = offset / getGRFSize();
195-
195+
196196
// allocate space for NOS constants and pushed constants
197197
AllocateConstants3DShader(offset);;
198198

@@ -214,7 +214,7 @@ void CHullShader::AllocateSinglePatchPayload()
214214
uint offset = 0;
215215

216216
//R0 is always allocated as a predefined variable. Increase offset for R0
217-
assert(m_R0);
217+
assert(m_R0);
218218
offset += getGRFSize();
219219

220220
// if m_pURBReadHandlesReg != nullptr, then we need to allocate ( (m_pOutputControlPointCount - 1)/8 + 1 ) registers for input handles
@@ -303,7 +303,7 @@ CVariable* CHullShader::GetURBReadHandlesReg()
303303
{
304304
m_pURBReadHandlesReg = GetNewVariable(
305305
numLanes(m_SIMDSize) * ( m_pNumURBReadHandleGRF ),
306-
ISA_TYPE_UD,
306+
ISA_TYPE_UD,
307307
EALIGN_GRF);
308308
}
309309
return m_pURBReadHandlesReg;
@@ -362,18 +362,18 @@ CVariable* CHullShader::GetURBInputHandle(CVariable* pVertexIndex)
362362
}
363363
}
364364

365-
QuadEltUnit CHullShader::GetFinalGlobalOffet(QuadEltUnit globalOffset)
366-
{
365+
QuadEltUnit CHullShader::GetFinalGlobalOffet(QuadEltUnit globalOffset)
366+
{
367367
return globalOffset;
368368
}
369369

370370
uint32_t CHullShader::GetMaxNumOfPushedInputs() const
371-
{
371+
{
372372
uint numberOfPatches = (m_properties.m_pShaderDispatchMode == EIGHT_PATCH_DISPATCH_MODE) ? 8 : 1;
373373

374-
// Determine how many of input attributes per InputControlPoint (Vertex) can be POTENTIALLY pushed
374+
// Determine how many of input attributes per InputControlPoint (Vertex) can be POTENTIALLY pushed
375375
// in current dispatch mode for current topology ( InputPatch size ).
376-
uint32_t maxNumOfPushedInputAttributesPerICP =
376+
uint32_t maxNumOfPushedInputAttributesPerICP =
377377
m_pMaxNumOfPushedInputs / (m_properties.m_pInputControlPointCount*numberOfPatches);
378378

379379
// Input attributes can be pushed only in pairs, so we need to round down the limit.
@@ -383,28 +383,28 @@ uint32_t CHullShader::GetMaxNumOfPushedInputs() const
383383
// They can be pushed only in pairs.
384384
uint32_t reqNumOfInputAttributesPerICP = iSTD::Align(m_properties.m_pMaxInputSignatureCount, 2);
385385

386-
// TODO: reqNumOfInputAttributesPerICP will have to be incremented by size of Vertex Header
386+
// TODO: reqNumOfInputAttributesPerICP will have to be incremented by size of Vertex Header
387387
// in case of SGV inputs have to be taken into consideration (will be done in next step).
388388
// reqNumOfInputAttributes += HeaderSize().Count();
389389

390390
// Determine ACTUAL number of attributes that can be pushed.
391391
// If the required number of input attributes is less that maximum potential number,
392392
// than all of the will be pushed.
393-
uint32_t actualNumOfPushedInputAttributesPerICP =
393+
uint32_t actualNumOfPushedInputAttributesPerICP =
394394
iSTD::Min(reqNumOfInputAttributesPerICP, maxNumOfPushedInputAttributesPerICP);
395395

396396
return actualNumOfPushedInputAttributesPerICP;
397397
}
398398

399-
void CHullShader::EmitPatchConstantInput(llvm::Instruction* pInst, CVariable* pDest)
399+
void CHullShader::EmitPatchConstantInput(llvm::Instruction* pInst, QuadEltUnit& attributeOffset, CVariable*& pPerSlotOffsetVar)
400400
{
401401
bool readHeader = ((dyn_cast<GenIntrinsicInst>(pInst))->getIntrinsicID() == GenISAIntrinsic::GenISA_HSURBPatchHeaderRead);
402402

403403
// patch constant input read
404404
llvm::Value* pIndirectVertexIdx = pInst->getOperand(0);
405405

406-
CVariable* pPerSlotOffsetVar = nullptr;
407-
QuadEltUnit attributeOffset(0);
406+
pPerSlotOffsetVar = nullptr;
407+
attributeOffset = QuadEltUnit(0);
408408

409409
// {BDW - WA, HS} Do not set pPerSlotOffset or change globalOffset to read TessFactors from URB.
410410
if (!readHeader)
@@ -422,25 +422,23 @@ void CHullShader::EmitPatchConstantInput(llvm::Instruction* pInst, CVariable* pD
422422

423423
attributeOffset = attributeOffset + GetURBHeaderSize();
424424
}
425-
426-
URBReadPatchConstOrOutputCntrlPtInput(pPerSlotOffsetVar, attributeOffset, false, pDest);
427425
}
428426

429-
void CHullShader::EmitOutputControlPointInput(llvm::Instruction* pInst, CVariable* pDest)
427+
void CHullShader::EmitOutputControlPointInput(llvm::Instruction* pInst, QuadEltUnit& attributeOffset, CVariable*& pPerSlotOffsetVar)
430428
{
431429
// patch constant input read
432430
llvm::Value* pIndirectVertexIdx = pInst->getOperand(0);
433431
llvm::Value* pAttribIdx = pInst->getOperand(1);
434432

435-
CVariable* pPerSlotOffsetVar = nullptr;
436-
QuadEltUnit attributeOffset(GetPatchConstantOutputSize());
433+
pPerSlotOffsetVar = nullptr;
434+
attributeOffset = GetPatchConstantOutputSize();
437435

438436
// Compute offset from vertex index
439437
if (llvm::ConstantInt* pConstVertexIdx = llvm::dyn_cast<llvm::ConstantInt>(pIndirectVertexIdx))
440438
{
441439
// attribute index is a constant, we can compute the URB read offset directly
442-
attributeOffset =
443-
attributeOffset +
440+
attributeOffset =
441+
attributeOffset +
444442
QuadEltUnit(int_cast<unsigned int>(pConstVertexIdx->getZExtValue())) * m_properties.m_pMaxOutputSignatureCount;
445443
}
446444
else
@@ -459,7 +457,7 @@ void CHullShader::EmitOutputControlPointInput(llvm::Instruction* pInst, CVariabl
459457
}
460458
}
461459

462-
// Compute additionall offset coming from atribute index
460+
// Compute additional offset coming from attribute index
463461
if (llvm::ConstantInt* pConstAttribIdx = llvm::dyn_cast<llvm::ConstantInt>(pAttribIdx))
464462
{
465463
// attribute offset is a constant, we can compute the URB read offset directly
@@ -482,53 +480,10 @@ void CHullShader::EmitOutputControlPointInput(llvm::Instruction* pInst, CVariabl
482480
pPerSlotOffsetVar = GetSymbol(pAttribIdx);
483481
}
484482
}
485-
486-
URBReadPatchConstOrOutputCntrlPtInput(pPerSlotOffsetVar, attributeOffset, false, pDest);
487-
}
488-
489-
void CHullShader::URBReadPatchConstOrOutputCntrlPtInput(
490-
CVariable* pPerSlotOffsetVar,
491-
QuadEltUnit globalOffset,
492-
bool EOT,
493-
CVariable* pDest )
494-
{
495-
CEncoder& encoder = GetEncoder();
496-
497-
const bool hasPerSlotOffsets = pPerSlotOffsetVar != nullptr;
498-
// Payload size is just URB handles (1 GRF) or URB handles and per-slot offsets (2 GRFs).
499-
const Unit<Element> payloadSize(hasPerSlotOffsets ? 2 : 1);
500-
CVariable* pPayload =
501-
GetNewVariable(payloadSize.Count() * numLanes(m_SIMDSize), ISA_TYPE_UD, EALIGN_GRF);
502-
503-
// get the register with URBHandles
504-
CopyVariable(pPayload, m_pURBWriteHandleReg);
505-
506-
// If we have runtime value in per-slot offsets, we need to copy per-slot offsets to payload
507-
if (hasPerSlotOffsets)
508-
{
509-
CopyVariable(pPayload, pPerSlotOffsetVar, 1);
510-
}
511-
512-
const Unit<Element> messageLength = payloadSize;
513-
const Unit<Element> responseLength(pDest->GetNumberElement()/numLanes(m_SIMDSize));
514-
const uint desc = UrbMessage(
515-
messageLength.Count(),
516-
responseLength.Count(),
517-
EOT,
518-
hasPerSlotOffsets,
519-
false,
520-
globalOffset.Count(),
521-
EU_GEN8_URB_OPCODE_SIMD8_READ);
522-
523-
const uint exDesc = EU_MESSAGE_TARGET_URB | (EOT ? 1 << 5 : 0);
524-
CVariable* pMessDesc = ImmToVariable(desc, ISA_TYPE_UD);
525-
526-
encoder.Send(pDest, pPayload, exDesc, pMessDesc);
527-
encoder.Push();
528483
}
529484

530485
/// Returns the size of the output vertex.
531-
/// Unit: 16B = 4 DWORDs
486+
/// Unit: 16B = 4 DWORDs
532487
/// Note: The PatchConstantOutput size must be 32B-aligned when rendering is enabled
533488
/// Therefore, the PatchConstantOutput size is also rounded up to a multiple of 2.
534489
QuadEltUnit CHullShader::GetPatchConstantOutputSize() const

IGC/Compiler/CISACodeGen/HullShaderCodeGen.hpp

+5-10
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ class CHullShader : public CShader
4040
/// Fills in the kernel program structure with data determined during compilation.
4141
void FillProgram(SHullShaderKernelProgram* pKernelProgram);
4242
void PreCompile();
43-
void EmitPatchConstantInput(llvm::Instruction* pInst, CVariable* pDest);
44-
void EmitOutputControlPointInput(llvm::Instruction* pInst, CVariable* pDest);
43+
void EmitPatchConstantInput(llvm::Instruction* pInst, QuadEltUnit& attributeOffset, CVariable*& pPerSlotOffsetVar);
44+
void EmitOutputControlPointInput(llvm::Instruction* pInst, QuadEltUnit& attributeOffset, CVariable*& pPerSlotOffsetVar);
4545
void ParseShaderSpecificOpcode(llvm::Instruction* inst);
4646
void AddPrologue();
4747

@@ -51,18 +51,13 @@ class CHullShader : public CShader
5151
void AllocateEightPatchPayload();
5252

5353
void SetShaderSpecificHelper(EmitPass* emitPass);
54-
54+
5555
CVariable* GetURBReadHandlesReg();
5656
CVariable* GetR1();
5757
CVariable* GetR2();
5858
virtual CVariable* GetURBInputHandle(CVariable* pVertexIndex);
5959
virtual QuadEltUnit GetFinalGlobalOffet(QuadEltUnit globalOffset);
6060
virtual uint32_t GetMaxNumOfPushedInputs() const;
61-
void URBReadPatchConstOrOutputCntrlPtInput(
62-
CVariable* pPerSlotOffset,
63-
QuadEltUnit globalOffset,
64-
bool EOT,
65-
CVariable* pDest);
6661

6762
void EmitPatchConstantHeader(
6863
CVariable* var[],
@@ -78,7 +73,7 @@ class CHullShader : public CShader
7873
OctEltUnit GetVertexURBEntryReadLength() const;
7974

8075
/// Returns a variable that stores URB write handle register
81-
virtual CVariable* GetURBOutputHandle();
76+
virtual CVariable* GetURBOutputHandle();
8277

8378
CVariable* GetPrimitiveID();
8479

@@ -108,7 +103,7 @@ class CHullShader : public CShader
108103
CVariable* m_pURBWriteHandleReg;
109104
CVariable* m_pURBReadHandlesReg; // used for vertex data pulled from URB
110105

111-
static const uint32_t m_pMaxNumOfPushedInputs; // holds max number of inputs that can be pushed for this shader unit
106+
static const uint32_t m_pMaxNumOfPushedInputs; // holds max number of inputs that can be pushed for this shader unit
112107
CVariable* m_IncludeVertexHandles;
113108
bool m_HasPrimitiveIDInstruction;
114109
uint32_t m_pNumURBReadHandleGRF;

IGC/GenISAIntrinsics/Intrinsic_definitions.py

+4
Original file line numberDiff line numberDiff line change
@@ -257,8 +257,12 @@
257257
"int","bool","bool","bool","bool","int"],"None"],
258258
"GenISA_RTWrite": ["void",["anyfloat","float","bool",0,0,0,0,"float","float","int","int","bool",
259259
"bool","bool","bool","int"],"None"],
260+
# (owordOffset, mask, x1, y1, z1, w1, x2, y2, z2, w2)
260261
"GenISA_URBWrite": ["void",["int","int","float","float","float","float","float","float","float","float"],"None"],
262+
# (index, owordOffset)->float8
261263
"GenISA_URBRead": ["float8",["int","int"],"NoMem"],
264+
# In-place data read using URB Write Handle. (owordOffset)->float8
265+
"GenISA_URBReadOutput": ["float8",["int"],"NoMem"],
262266
"GenISA_SetDebugReg": ["int",["int"],"None"],
263267
"GenISA_add_pair": [["int","int"],["int","int","int","int"],"NoMem"],
264268
"GenISA_sub_pair": [["int","int"],["int","int","int","int"],"NoMem"],

0 commit comments

Comments
 (0)