Skip to content

Commit a9ebee6

Browse files
Finished a pure CPU-draw vs GPU-indirect-draw without culling comparison
1 parent b41017b commit a9ebee6

File tree

6 files changed

+129
-31
lines changed

6 files changed

+129
-31
lines changed

examples_tests/26.MultidrawIndirectVSCPUCull/MultidrawIndirectVSCPUCull.cbp

+2-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@
8585
</Compiler>
8686
<Unit filename="main.cpp" />
8787
<Unit filename="mesh.frag" />
88-
<Unit filename="mesh.vert" />
88+
<Unit filename="meshCPU.vert" />
89+
<Unit filename="meshGPU.vert" />
8990
<Extensions>
9091
<code_completion />
9192
<debugger />
Binary file not shown.
Loading

examples_tests/26.MultidrawIndirectVSCPUCull/main.cpp

+96-30
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ using namespace core;
1111

1212
bool quit = false;
1313

14-
bool doGPUCulling = false;
14+
bool doCulling = false;
15+
bool useDrawIndirect = false;
1516

1617
//!Same As Last Example
1718
class MyEventReceiver : public IEventReceiver
@@ -31,8 +32,11 @@ class MyEventReceiver : public IEventReceiver
3132
case irr::KEY_KEY_Q: // so we can quit
3233
quit = true;
3334
return true;
35+
case irr::KEY_KEY_C: // so we can quit
36+
///doCulling = !doCulling; // Not enabled/necessary yet
37+
return true;
3438
case irr::KEY_SPACE: // toggle between gpu and cpu cull
35-
doGPUCulling = !doGPUCulling;
39+
useDrawIndirect = !useDrawIndirect;
3640
return true;
3741
default:
3842
break;
@@ -86,6 +90,15 @@ struct ObjectData_t
8690
float padding[3];
8791
};
8892

93+
//
94+
struct DrawElementsIndirectCommand
95+
{
96+
uint32_t count;
97+
uint32_t instanceCount;
98+
uint32_t firstIndex;
99+
uint32_t baseVertex;
100+
uint32_t baseInstance;
101+
};
89102

90103
int main()
91104
{
@@ -109,23 +122,24 @@ int main()
109122
video::IVideoDriver* driver = device->getVideoDriver();
110123

111124
SimpleCallBack* cb = new SimpleCallBack();
112-
video::E_MATERIAL_TYPE litSolidMaterialType = (video::E_MATERIAL_TYPE)driver->getGPUProgrammingServices()->addHighLevelShaderMaterialFromFiles("../mesh.vert",
125+
video::E_MATERIAL_TYPE cpuCullMaterial = (video::E_MATERIAL_TYPE)driver->getGPUProgrammingServices()->addHighLevelShaderMaterialFromFiles("../meshCPU.vert",
113126
"","","", //! No Geometry or Tessellation Shaders
114127
"../mesh.frag",
115128
3,video::EMT_SOLID,
116129
cb);
117130
cb->drop();
118131

119-
video::SMaterial material;
120-
material.setTexture(0,driver->getTexture("../../media/wall.jpg"));
121-
material.MaterialType = litSolidMaterialType;
132+
video::E_MATERIAL_TYPE gpuCullMaterial = (video::E_MATERIAL_TYPE)driver->getGPUProgrammingServices()->addHighLevelShaderMaterialFromFiles("../meshGPU.vert",
133+
"","","", //! No Geometry or Tessellation Shaders
134+
"../mesh.frag",
135+
3,video::EMT_SOLID);
122136

123137

124138
scene::ISceneManager* smgr = device->getSceneManager();
125139

126-
#define kInstanceCount 2048
127-
#define kTotalTriangleLimit (32*1024*1024)
128-
#define kMinTriangleLimit 1024
140+
#define kInstanceCount 4096
141+
#define kTotalTriangleLimit (64*1024*1024)
142+
#define kMinTriangleLimit 64
129143

130144
scene::ICameraSceneNode* camera =
131145
smgr->addCameraSceneNodeFPS(0,100.0f,0.01f);
@@ -151,25 +165,26 @@ int main()
151165
cacheFile->drop();
152166

153167
//make sure its still ok
154-
std::sort(normalCacheFor2_10_10_10Quant.begin(),normalCacheFor2_10_10_10Quant.end());
168+
std::sort(scene::normalCacheFor2_10_10_10Quant.begin(),scene::normalCacheFor2_10_10_10Quant.end());
155169
}
156170
}
157171

158172
core::matrix4x3 instanceXForm[kInstanceCount];
159173
scene::IGPUMeshBuffer* mbuff[kInstanceCount] = {NULL};
174+
video::IGPUBuffer* indirectDrawBuffer = NULL;
160175

176+
scene::IGPUMeshDataFormatDesc* vaospec = driver->createGPUMeshDataFormatDesc();
161177
{
162178
scene::ICPUMesh* cpumesh[kInstanceCount];
163179

164180
size_t vertexSize = 0;
165181
std::vector<uint8_t> vertexData;
166182
std::vector<uint32_t> indexData;
167-
scene::IGPUMeshDataFormatDesc* vaospec = driver->createGPUMeshDataFormatDesc();
168183

169184
std::random_device rd;
170185
std::mt19937 mt(rd());
171186
//std::uniform_int_distribution<uint32_t> dist(kMinTriangleLimit, kTotalTriangleLimit*2/kInstanceCount-kMinTriangleLimit);
172-
std::uniform_int_distribution<uint32_t> dist(kMinTriangleLimit, kMinTriangleLimit*16);
187+
std::uniform_int_distribution<uint32_t> dist(kMinTriangleLimit, kMinTriangleLimit*18);
173188
for (size_t i=0; i<kInstanceCount; i++)
174189
{
175190
float poly = sqrtf(dist(mt))+0.5f;
@@ -231,12 +246,16 @@ int main()
231246
video::IGPUBuffer* vxbuf = driver->createGPUBuffer(vertexData.size(),vertexData.data());
232247
vertexData.clear();
233248

249+
250+
DrawElementsIndirectCommand indirectDrawData[kInstanceCount];
251+
234252
uint32_t baseVertex = 0;
235253
uint32_t indexOffset = 0;
236-
std::uniform_real_distribution<float> dist3D(0.f,100.f);
254+
std::uniform_real_distribution<float> dist3D(0.f,400.f);
237255
for (size_t i=0; i<kInstanceCount; i++)
238256
{
239-
scene::IMeshDataFormatDesc<core::ICPUBuffer>* format = cpumesh[i]->getMeshBuffer(0)->getMeshDataAndFormat();
257+
scene::ICPUMeshBuffer* mbuf = cpumesh[i]->getMeshBuffer(0);
258+
scene::IMeshDataFormatDesc<core::ICPUBuffer>* format = mbuf->getMeshDataAndFormat();
240259
if (i==0)
241260
{
242261
for (size_t j=0; j<scene::EVAI_COUNT; j++)
@@ -251,31 +270,36 @@ int main()
251270
}
252271
}
253272

273+
indirectDrawData[i].count = mbuf->getIndexCount();
274+
indirectDrawData[i].instanceCount = 1;
275+
indirectDrawData[i].firstIndex = indexOffset/sizeof(uint32_t);
276+
indirectDrawData[i].baseVertex = baseVertex;
277+
indirectDrawData[i].baseInstance = 0;
278+
279+
254280
mbuff[i] = new scene::IGPUMeshBuffer();
255281
mbuff[i]->setBaseVertex(baseVertex);
256282
baseVertex += format->getMappedBuffer(scene::EVAI_ATTR0)->getSize()/vertexSize;
257283

258284
mbuff[i]->setBoundingBox(cpumesh[i]->getBoundingBox());
259285

260286
mbuff[i]->setIndexBufferOffset(indexOffset);
261-
indexOffset += cpumesh[i]->getMeshBuffer(0)->getIndexCount()*sizeof(uint32_t);
287+
indexOffset += mbuf->getIndexCount()*sizeof(uint32_t);
262288

263-
mbuff[i]->setIndexCount(cpumesh[i]->getMeshBuffer(0)->getIndexCount());
289+
mbuff[i]->setIndexCount(mbuf->getIndexCount());
264290
mbuff[i]->setIndexType(video::EIT_32BIT);
265291
mbuff[i]->setMeshDataAndFormat(vaospec);
266292
mbuff[i]->setPrimitiveType(scene::EPT_TRIANGLES);
267293

268294
cpumesh[i]->drop();
269295

270296

271-
instanceXForm[i].setScale(dist3D(mt)*0.01f+1.f);
297+
instanceXForm[i].setScale(dist3D(mt)*0.0025f+1.f);
272298
instanceXForm[i].setTranslation(core::vector3df(dist3D(mt),dist3D(mt),dist3D(mt)));
273299
}
274300
vxbuf->drop();
275301

276-
//
277-
278-
vaospec->drop();
302+
indirectDrawBuffer = driver->createGPUBuffer(sizeof(indirectDrawData),indirectDrawData);
279303
}
280304

281305
ObjectData_t perObjectData[kInstanceCount];
@@ -292,19 +316,59 @@ int main()
292316
//! Draw the view
293317
smgr->drawAll();
294318

295-
for (size_t i=0; i<kInstanceCount; i++)
319+
if (useDrawIndirect)
296320
{
297-
perObjectData[i].modelViewProjMatrix = core::concatenateBFollowedByA(driver->getTransform(video::EPTS_PROJ_VIEW),instanceXForm[i]);
298-
instanceXForm[i].getSub3x3InverseTranspose(perObjectData[i].normalMat);
321+
if (doCulling)
322+
{
323+
//make sure results are visible
324+
video::COpenGLExtensionHandler::extGlMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
325+
}
326+
else
327+
{
328+
//do compute shader to produce indirect draw buffer (and cull)
329+
for (size_t i=0; i<kInstanceCount; i++)
330+
{
331+
perObjectData[i].modelViewProjMatrix = core::concatenateBFollowedByA(driver->getTransform(video::EPTS_PROJ_VIEW),instanceXForm[i]);
332+
instanceXForm[i].getSub3x3InverseTranspose(perObjectData[i].normalMat);
333+
}
334+
perObjectSSBO->updateSubRange(0,sizeof(perObjectData),perObjectData);
335+
}
336+
337+
//fire it off
338+
video::COpenGLExtensionHandler::extGlBindBuffersBase(GL_SHADER_STORAGE_BUFFER,0,1,
339+
&static_cast<video::COpenGLBuffer*>(perObjectSSBO)->getOpenGLName());
340+
341+
video::SMaterial material;
342+
material.MaterialType = gpuCullMaterial;
343+
driver->setMaterial(material);
344+
driver->drawIndexedIndirect(vaospec,scene::EPT_TRIANGLES,video::EIT_32BIT,indirectDrawBuffer,0,kInstanceCount,sizeof(DrawElementsIndirectCommand));
299345
}
300-
perObjectSSBO->updateSubRange(0,sizeof(perObjectData),perObjectData);
301-
video::COpenGLExtensionHandler::extGlBindBuffersBase(GL_SHADER_STORAGE_BUFFER,0,1,
302-
&static_cast<video::COpenGLBuffer*>(perObjectSSBO)->getOpenGLName());
303-
for (size_t i=0; i<kInstanceCount; i++)
346+
else
304347
{
305-
reinterpret_cast<uint32_t&>(material.userData) = i;
306-
driver->setMaterial(material);
307-
driver->drawMeshBuffer(mbuff[i]);
348+
scene::IGPUMeshBuffer* mb2draw[kInstanceCount];
349+
350+
size_t unculledNum = 0;
351+
for (size_t i=0; i<kInstanceCount; i++)
352+
{
353+
if (doCulling)
354+
continue;
355+
356+
mb2draw[unculledNum] = mbuff[i];
357+
perObjectData[unculledNum].modelViewProjMatrix = core::concatenateBFollowedByA(driver->getTransform(video::EPTS_PROJ_VIEW),instanceXForm[i]);
358+
instanceXForm[i].getSub3x3InverseTranspose(perObjectData[unculledNum].normalMat);
359+
unculledNum++;
360+
}
361+
perObjectSSBO->updateSubRange(0,unculledNum*sizeof(ObjectData_t),perObjectData);
362+
video::COpenGLExtensionHandler::extGlBindBuffersBase(GL_SHADER_STORAGE_BUFFER,0,1,
363+
&static_cast<video::COpenGLBuffer*>(perObjectSSBO)->getOpenGLName());
364+
for (size_t i=0; i<unculledNum; i++)
365+
{
366+
video::SMaterial material;
367+
material.MaterialType = cpuCullMaterial;
368+
reinterpret_cast<uint32_t&>(material.userData) = i;
369+
driver->setMaterial(material);
370+
driver->drawMeshBuffer(mb2draw[i]);
371+
}
308372
}
309373
video::COpenGLExtensionHandler::extGlBindBuffersBase(GL_SHADER_STORAGE_BUFFER,0,1,NULL);
310374

@@ -322,6 +386,8 @@ int main()
322386
}
323387
}
324388
perObjectSSBO->drop();
389+
indirectDrawBuffer->drop();
390+
vaospec->drop();
325391

326392
//create a screenshot
327393
video::IImage* screenshot = driver->createImage(video::ECF_A8R8G8B8,params.WindowSize);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#version 430 core
2+
3+
#extension ARB_shader_draw_parameters : require
4+
5+
struct ModelData_t
6+
{
7+
mat4 MVP;
8+
mat3 normalMat;
9+
};
10+
11+
layout(std430, binding = 0) buffer PerObject
12+
{
13+
ModelData_t modelData[];
14+
};
15+
16+
17+
layout(location = 0 ) in vec3 vPos; //only a 3d position is passed from irrlicht, but last (the W) coordinate gets filled with default 1.0
18+
layout(location = 3 ) in vec3 vNormal;
19+
20+
out vec4 Color; //per vertex output color, will be interpolated across the triangle
21+
flat out vec3 Normal;
22+
23+
void main()
24+
{
25+
uint drawID = gl_DrawIDARB;
26+
27+
gl_Position = modelData[drawID].MVP[0]*vPos.x+modelData[drawID].MVP[1]*vPos.y+modelData[drawID].MVP[2]*vPos.z+modelData[drawID].MVP[3];
28+
Color = vec4(0.4,0.4,1.0,1.0);
29+
Normal = normalize(modelData[drawID].normalMat*vNormal); //have to normalize twice because of normal quantization
30+
}
31+

0 commit comments

Comments
 (0)