@@ -192,7 +192,7 @@ void CHullShader::AllocateEightPatchPayload()
192
192
193
193
assert (offset % getGRFSize () == 0 );
194
194
ProgramOutput ()->m_startReg = offset / getGRFSize ();
195
-
195
+
196
196
// allocate space for NOS constants and pushed constants
197
197
AllocateConstants3DShader (offset);;
198
198
@@ -214,7 +214,7 @@ void CHullShader::AllocateSinglePatchPayload()
214
214
uint offset = 0 ;
215
215
216
216
// R0 is always allocated as a predefined variable. Increase offset for R0
217
- assert (m_R0);
217
+ assert (m_R0);
218
218
offset += getGRFSize ();
219
219
220
220
// if m_pURBReadHandlesReg != nullptr, then we need to allocate ( (m_pOutputControlPointCount - 1)/8 + 1 ) registers for input handles
@@ -303,7 +303,7 @@ CVariable* CHullShader::GetURBReadHandlesReg()
303
303
{
304
304
m_pURBReadHandlesReg = GetNewVariable (
305
305
numLanes (m_SIMDSize) * ( m_pNumURBReadHandleGRF ),
306
- ISA_TYPE_UD,
306
+ ISA_TYPE_UD,
307
307
EALIGN_GRF);
308
308
}
309
309
return m_pURBReadHandlesReg;
@@ -362,18 +362,18 @@ CVariable* CHullShader::GetURBInputHandle(CVariable* pVertexIndex)
362
362
}
363
363
}
364
364
365
- QuadEltUnit CHullShader::GetFinalGlobalOffet (QuadEltUnit globalOffset)
366
- {
365
+ QuadEltUnit CHullShader::GetFinalGlobalOffet (QuadEltUnit globalOffset)
366
+ {
367
367
return globalOffset;
368
368
}
369
369
370
370
uint32_t CHullShader::GetMaxNumOfPushedInputs () const
371
- {
371
+ {
372
372
uint numberOfPatches = (m_properties.m_pShaderDispatchMode == EIGHT_PATCH_DISPATCH_MODE) ? 8 : 1 ;
373
373
374
- // Determine how many of input attributes per InputControlPoint (Vertex) can be POTENTIALLY pushed
374
+ // Determine how many of input attributes per InputControlPoint (Vertex) can be POTENTIALLY pushed
375
375
// in current dispatch mode for current topology ( InputPatch size ).
376
- uint32_t maxNumOfPushedInputAttributesPerICP =
376
+ uint32_t maxNumOfPushedInputAttributesPerICP =
377
377
m_pMaxNumOfPushedInputs / (m_properties.m_pInputControlPointCount *numberOfPatches);
378
378
379
379
// Input attributes can be pushed only in pairs, so we need to round down the limit.
@@ -383,28 +383,28 @@ uint32_t CHullShader::GetMaxNumOfPushedInputs() const
383
383
// They can be pushed only in pairs.
384
384
uint32_t reqNumOfInputAttributesPerICP = iSTD::Align (m_properties.m_pMaxInputSignatureCount , 2 );
385
385
386
- // TODO: reqNumOfInputAttributesPerICP will have to be incremented by size of Vertex Header
386
+ // TODO: reqNumOfInputAttributesPerICP will have to be incremented by size of Vertex Header
387
387
// in case of SGV inputs have to be taken into consideration (will be done in next step).
388
388
// reqNumOfInputAttributes += HeaderSize().Count();
389
389
390
390
// Determine ACTUAL number of attributes that can be pushed.
391
391
// If the required number of input attributes is less that maximum potential number,
392
392
// than all of the will be pushed.
393
- uint32_t actualNumOfPushedInputAttributesPerICP =
393
+ uint32_t actualNumOfPushedInputAttributesPerICP =
394
394
iSTD::Min (reqNumOfInputAttributesPerICP, maxNumOfPushedInputAttributesPerICP);
395
395
396
396
return actualNumOfPushedInputAttributesPerICP;
397
397
}
398
398
399
- void CHullShader::EmitPatchConstantInput (llvm::Instruction* pInst, CVariable* pDest )
399
+ void CHullShader::EmitPatchConstantInput (llvm::Instruction* pInst, QuadEltUnit& attributeOffset, CVariable*& pPerSlotOffsetVar )
400
400
{
401
401
bool readHeader = ((dyn_cast<GenIntrinsicInst>(pInst))->getIntrinsicID () == GenISAIntrinsic::GenISA_HSURBPatchHeaderRead);
402
402
403
403
// patch constant input read
404
404
llvm::Value* pIndirectVertexIdx = pInst->getOperand (0 );
405
405
406
- CVariable* pPerSlotOffsetVar = nullptr ;
407
- QuadEltUnit attributeOffset (0 );
406
+ pPerSlotOffsetVar = nullptr ;
407
+ attributeOffset = QuadEltUnit (0 );
408
408
409
409
// {BDW - WA, HS} Do not set pPerSlotOffset or change globalOffset to read TessFactors from URB.
410
410
if (!readHeader)
@@ -422,25 +422,23 @@ void CHullShader::EmitPatchConstantInput(llvm::Instruction* pInst, CVariable* pD
422
422
423
423
attributeOffset = attributeOffset + GetURBHeaderSize ();
424
424
}
425
-
426
- URBReadPatchConstOrOutputCntrlPtInput (pPerSlotOffsetVar, attributeOffset, false , pDest);
427
425
}
428
426
429
- void CHullShader::EmitOutputControlPointInput (llvm::Instruction* pInst, CVariable* pDest )
427
+ void CHullShader::EmitOutputControlPointInput (llvm::Instruction* pInst, QuadEltUnit& attributeOffset, CVariable*& pPerSlotOffsetVar )
430
428
{
431
429
// patch constant input read
432
430
llvm::Value* pIndirectVertexIdx = pInst->getOperand (0 );
433
431
llvm::Value* pAttribIdx = pInst->getOperand (1 );
434
432
435
- CVariable* pPerSlotOffsetVar = nullptr ;
436
- QuadEltUnit attributeOffset ( GetPatchConstantOutputSize () );
433
+ pPerSlotOffsetVar = nullptr ;
434
+ attributeOffset = GetPatchConstantOutputSize ();
437
435
438
436
// Compute offset from vertex index
439
437
if (llvm::ConstantInt* pConstVertexIdx = llvm::dyn_cast<llvm::ConstantInt>(pIndirectVertexIdx))
440
438
{
441
439
// attribute index is a constant, we can compute the URB read offset directly
442
- attributeOffset =
443
- attributeOffset +
440
+ attributeOffset =
441
+ attributeOffset +
444
442
QuadEltUnit (int_cast<unsigned int >(pConstVertexIdx->getZExtValue ())) * m_properties.m_pMaxOutputSignatureCount ;
445
443
}
446
444
else
@@ -459,7 +457,7 @@ void CHullShader::EmitOutputControlPointInput(llvm::Instruction* pInst, CVariabl
459
457
}
460
458
}
461
459
462
- // Compute additionall offset coming from atribute index
460
+ // Compute additional offset coming from attribute index
463
461
if (llvm::ConstantInt* pConstAttribIdx = llvm::dyn_cast<llvm::ConstantInt>(pAttribIdx))
464
462
{
465
463
// attribute offset is a constant, we can compute the URB read offset directly
@@ -482,53 +480,10 @@ void CHullShader::EmitOutputControlPointInput(llvm::Instruction* pInst, CVariabl
482
480
pPerSlotOffsetVar = GetSymbol (pAttribIdx);
483
481
}
484
482
}
485
-
486
- URBReadPatchConstOrOutputCntrlPtInput (pPerSlotOffsetVar, attributeOffset, false , pDest);
487
- }
488
-
489
- void CHullShader::URBReadPatchConstOrOutputCntrlPtInput (
490
- CVariable* pPerSlotOffsetVar,
491
- QuadEltUnit globalOffset,
492
- bool EOT,
493
- CVariable* pDest )
494
- {
495
- CEncoder& encoder = GetEncoder ();
496
-
497
- const bool hasPerSlotOffsets = pPerSlotOffsetVar != nullptr ;
498
- // Payload size is just URB handles (1 GRF) or URB handles and per-slot offsets (2 GRFs).
499
- const Unit<Element> payloadSize (hasPerSlotOffsets ? 2 : 1 );
500
- CVariable* pPayload =
501
- GetNewVariable (payloadSize.Count () * numLanes (m_SIMDSize), ISA_TYPE_UD, EALIGN_GRF);
502
-
503
- // get the register with URBHandles
504
- CopyVariable (pPayload, m_pURBWriteHandleReg);
505
-
506
- // If we have runtime value in per-slot offsets, we need to copy per-slot offsets to payload
507
- if (hasPerSlotOffsets)
508
- {
509
- CopyVariable (pPayload, pPerSlotOffsetVar, 1 );
510
- }
511
-
512
- const Unit<Element> messageLength = payloadSize;
513
- const Unit<Element> responseLength (pDest->GetNumberElement ()/numLanes (m_SIMDSize));
514
- const uint desc = UrbMessage (
515
- messageLength.Count (),
516
- responseLength.Count (),
517
- EOT,
518
- hasPerSlotOffsets,
519
- false ,
520
- globalOffset.Count (),
521
- EU_GEN8_URB_OPCODE_SIMD8_READ);
522
-
523
- const uint exDesc = EU_MESSAGE_TARGET_URB | (EOT ? 1 << 5 : 0 );
524
- CVariable* pMessDesc = ImmToVariable (desc, ISA_TYPE_UD);
525
-
526
- encoder.Send (pDest, pPayload, exDesc, pMessDesc);
527
- encoder.Push ();
528
483
}
529
484
530
485
// / Returns the size of the output vertex.
531
- // / Unit: 16B = 4 DWORDs
486
+ // / Unit: 16B = 4 DWORDs
532
487
// / Note: The PatchConstantOutput size must be 32B-aligned when rendering is enabled
533
488
// / Therefore, the PatchConstantOutput size is also rounded up to a multiple of 2.
534
489
QuadEltUnit CHullShader::GetPatchConstantOutputSize () const
0 commit comments