@@ -11,7 +11,8 @@ using namespace core;
11
11
12
12
bool quit = false ;
13
13
14
- bool doGPUCulling = false ;
14
+ bool doCulling = false ;
15
+ bool useDrawIndirect = false ;
15
16
16
17
// !Same As Last Example
17
18
class MyEventReceiver : public IEventReceiver
@@ -31,8 +32,11 @@ class MyEventReceiver : public IEventReceiver
31
32
case irr::KEY_KEY_Q: // so we can quit
32
33
quit = true ;
33
34
return true ;
35
+ case irr::KEY_KEY_C: // so we can quit
36
+ // /doCulling = !doCulling; // Not enabled/necessary yet
37
+ return true ;
34
38
case irr::KEY_SPACE: // toggle between gpu and cpu cull
35
- doGPUCulling = !doGPUCulling ;
39
+ useDrawIndirect = !useDrawIndirect ;
36
40
return true ;
37
41
default :
38
42
break ;
@@ -86,6 +90,15 @@ struct ObjectData_t
86
90
float padding[3 ];
87
91
};
88
92
93
+ //
94
+ struct DrawElementsIndirectCommand
95
+ {
96
+ uint32_t count;
97
+ uint32_t instanceCount;
98
+ uint32_t firstIndex;
99
+ uint32_t baseVertex;
100
+ uint32_t baseInstance;
101
+ };
89
102
90
103
int main ()
91
104
{
@@ -109,23 +122,24 @@ int main()
109
122
video::IVideoDriver* driver = device->getVideoDriver ();
110
123
111
124
SimpleCallBack* cb = new SimpleCallBack ();
112
- video::E_MATERIAL_TYPE litSolidMaterialType = (video::E_MATERIAL_TYPE)driver->getGPUProgrammingServices ()->addHighLevelShaderMaterialFromFiles (" ../mesh .vert" ,
125
+ video::E_MATERIAL_TYPE cpuCullMaterial = (video::E_MATERIAL_TYPE)driver->getGPUProgrammingServices ()->addHighLevelShaderMaterialFromFiles (" ../meshCPU .vert" ,
113
126
" " ," " ," " , // ! No Geometry or Tessellation Shaders
114
127
" ../mesh.frag" ,
115
128
3 ,video::EMT_SOLID,
116
129
cb);
117
130
cb->drop ();
118
131
119
- video::SMaterial material;
120
- material.setTexture (0 ,driver->getTexture (" ../../media/wall.jpg" ));
121
- material.MaterialType = litSolidMaterialType;
132
+ video::E_MATERIAL_TYPE gpuCullMaterial = (video::E_MATERIAL_TYPE)driver->getGPUProgrammingServices ()->addHighLevelShaderMaterialFromFiles (" ../meshGPU.vert" ,
133
+ " " ," " ," " , // ! No Geometry or Tessellation Shaders
134
+ " ../mesh.frag" ,
135
+ 3 ,video::EMT_SOLID);
122
136
123
137
124
138
scene::ISceneManager* smgr = device->getSceneManager ();
125
139
126
- #define kInstanceCount 2048
127
- #define kTotalTriangleLimit (32 *1024 *1024 )
128
- #define kMinTriangleLimit 1024
140
+ #define kInstanceCount 4096
141
+ #define kTotalTriangleLimit (64 *1024 *1024 )
142
+ #define kMinTriangleLimit 64
129
143
130
144
scene::ICameraSceneNode* camera =
131
145
smgr->addCameraSceneNodeFPS (0 ,100 .0f ,0 .01f );
@@ -151,25 +165,26 @@ int main()
151
165
cacheFile->drop ();
152
166
153
167
// make sure its still ok
154
- std::sort (normalCacheFor2_10_10_10Quant.begin (),normalCacheFor2_10_10_10Quant.end ());
168
+ std::sort (scene:: normalCacheFor2_10_10_10Quant.begin (),scene:: normalCacheFor2_10_10_10Quant.end ());
155
169
}
156
170
}
157
171
158
172
core::matrix4x3 instanceXForm[kInstanceCount ];
159
173
scene::IGPUMeshBuffer* mbuff[kInstanceCount ] = {NULL };
174
+ video::IGPUBuffer* indirectDrawBuffer = NULL ;
160
175
176
+ scene::IGPUMeshDataFormatDesc* vaospec = driver->createGPUMeshDataFormatDesc ();
161
177
{
162
178
scene::ICPUMesh* cpumesh[kInstanceCount ];
163
179
164
180
size_t vertexSize = 0 ;
165
181
std::vector<uint8_t > vertexData;
166
182
std::vector<uint32_t > indexData;
167
- scene::IGPUMeshDataFormatDesc* vaospec = driver->createGPUMeshDataFormatDesc ();
168
183
169
184
std::random_device rd;
170
185
std::mt19937 mt (rd ());
171
186
// std::uniform_int_distribution<uint32_t> dist(kMinTriangleLimit, kTotalTriangleLimit*2/kInstanceCount-kMinTriangleLimit);
172
- std::uniform_int_distribution<uint32_t > dist (kMinTriangleLimit , kMinTriangleLimit *16 );
187
+ std::uniform_int_distribution<uint32_t > dist (kMinTriangleLimit , kMinTriangleLimit *18 );
173
188
for (size_t i=0 ; i<kInstanceCount ; i++)
174
189
{
175
190
float poly = sqrtf (dist (mt))+0 .5f ;
@@ -231,12 +246,16 @@ int main()
231
246
video::IGPUBuffer* vxbuf = driver->createGPUBuffer (vertexData.size (),vertexData.data ());
232
247
vertexData.clear ();
233
248
249
+
250
+ DrawElementsIndirectCommand indirectDrawData[kInstanceCount ];
251
+
234
252
uint32_t baseVertex = 0 ;
235
253
uint32_t indexOffset = 0 ;
236
- std::uniform_real_distribution<float > dist3D (0 .f ,100 .f );
254
+ std::uniform_real_distribution<float > dist3D (0 .f ,400 .f );
237
255
for (size_t i=0 ; i<kInstanceCount ; i++)
238
256
{
239
- scene::IMeshDataFormatDesc<core::ICPUBuffer>* format = cpumesh[i]->getMeshBuffer (0 )->getMeshDataAndFormat ();
257
+ scene::ICPUMeshBuffer* mbuf = cpumesh[i]->getMeshBuffer (0 );
258
+ scene::IMeshDataFormatDesc<core::ICPUBuffer>* format = mbuf->getMeshDataAndFormat ();
240
259
if (i==0 )
241
260
{
242
261
for (size_t j=0 ; j<scene::EVAI_COUNT; j++)
@@ -251,31 +270,36 @@ int main()
251
270
}
252
271
}
253
272
273
+ indirectDrawData[i].count = mbuf->getIndexCount ();
274
+ indirectDrawData[i].instanceCount = 1 ;
275
+ indirectDrawData[i].firstIndex = indexOffset/sizeof (uint32_t );
276
+ indirectDrawData[i].baseVertex = baseVertex;
277
+ indirectDrawData[i].baseInstance = 0 ;
278
+
279
+
254
280
mbuff[i] = new scene::IGPUMeshBuffer ();
255
281
mbuff[i]->setBaseVertex (baseVertex);
256
282
baseVertex += format->getMappedBuffer (scene::EVAI_ATTR0)->getSize ()/vertexSize;
257
283
258
284
mbuff[i]->setBoundingBox (cpumesh[i]->getBoundingBox ());
259
285
260
286
mbuff[i]->setIndexBufferOffset (indexOffset);
261
- indexOffset += cpumesh[i]-> getMeshBuffer ( 0 ) ->getIndexCount ()*sizeof (uint32_t );
287
+ indexOffset += mbuf ->getIndexCount ()*sizeof (uint32_t );
262
288
263
- mbuff[i]->setIndexCount (cpumesh[i]-> getMeshBuffer ( 0 ) ->getIndexCount ());
289
+ mbuff[i]->setIndexCount (mbuf ->getIndexCount ());
264
290
mbuff[i]->setIndexType (video::EIT_32BIT);
265
291
mbuff[i]->setMeshDataAndFormat (vaospec);
266
292
mbuff[i]->setPrimitiveType (scene::EPT_TRIANGLES);
267
293
268
294
cpumesh[i]->drop ();
269
295
270
296
271
- instanceXForm[i].setScale (dist3D (mt)*0 .01f +1 .f );
297
+ instanceXForm[i].setScale (dist3D (mt)*0 .0025f +1 .f );
272
298
instanceXForm[i].setTranslation (core::vector3df (dist3D (mt),dist3D (mt),dist3D (mt)));
273
299
}
274
300
vxbuf->drop ();
275
301
276
- //
277
-
278
- vaospec->drop ();
302
+ indirectDrawBuffer = driver->createGPUBuffer (sizeof (indirectDrawData),indirectDrawData);
279
303
}
280
304
281
305
ObjectData_t perObjectData[kInstanceCount ];
@@ -292,19 +316,59 @@ int main()
292
316
// ! Draw the view
293
317
smgr->drawAll ();
294
318
295
- for ( size_t i= 0 ; i< kInstanceCount ; i++ )
319
+ if (useDrawIndirect )
296
320
{
297
- perObjectData[i].modelViewProjMatrix = core::concatenateBFollowedByA (driver->getTransform (video::EPTS_PROJ_VIEW),instanceXForm[i]);
298
- instanceXForm[i].getSub3x3InverseTranspose (perObjectData[i].normalMat );
321
+ if (doCulling)
322
+ {
323
+ // make sure results are visible
324
+ video::COpenGLExtensionHandler::extGlMemoryBarrier (GL_SHADER_STORAGE_BARRIER_BIT);
325
+ }
326
+ else
327
+ {
328
+ // do compute shader to produce indirect draw buffer (and cull)
329
+ for (size_t i=0 ; i<kInstanceCount ; i++)
330
+ {
331
+ perObjectData[i].modelViewProjMatrix = core::concatenateBFollowedByA (driver->getTransform (video::EPTS_PROJ_VIEW),instanceXForm[i]);
332
+ instanceXForm[i].getSub3x3InverseTranspose (perObjectData[i].normalMat );
333
+ }
334
+ perObjectSSBO->updateSubRange (0 ,sizeof (perObjectData),perObjectData);
335
+ }
336
+
337
+ // fire it off
338
+ video::COpenGLExtensionHandler::extGlBindBuffersBase (GL_SHADER_STORAGE_BUFFER,0 ,1 ,
339
+ &static_cast <video::COpenGLBuffer*>(perObjectSSBO)->getOpenGLName ());
340
+
341
+ video::SMaterial material;
342
+ material.MaterialType = gpuCullMaterial;
343
+ driver->setMaterial (material);
344
+ driver->drawIndexedIndirect (vaospec,scene::EPT_TRIANGLES,video::EIT_32BIT,indirectDrawBuffer,0 ,kInstanceCount ,sizeof (DrawElementsIndirectCommand));
299
345
}
300
- perObjectSSBO->updateSubRange (0 ,sizeof (perObjectData),perObjectData);
301
- video::COpenGLExtensionHandler::extGlBindBuffersBase (GL_SHADER_STORAGE_BUFFER,0 ,1 ,
302
- &static_cast <video::COpenGLBuffer*>(perObjectSSBO)->getOpenGLName ());
303
- for (size_t i=0 ; i<kInstanceCount ; i++)
346
+ else
304
347
{
305
- reinterpret_cast <uint32_t &>(material.userData ) = i;
306
- driver->setMaterial (material);
307
- driver->drawMeshBuffer (mbuff[i]);
348
+ scene::IGPUMeshBuffer* mb2draw[kInstanceCount ];
349
+
350
+ size_t unculledNum = 0 ;
351
+ for (size_t i=0 ; i<kInstanceCount ; i++)
352
+ {
353
+ if (doCulling)
354
+ continue ;
355
+
356
+ mb2draw[unculledNum] = mbuff[i];
357
+ perObjectData[unculledNum].modelViewProjMatrix = core::concatenateBFollowedByA (driver->getTransform (video::EPTS_PROJ_VIEW),instanceXForm[i]);
358
+ instanceXForm[i].getSub3x3InverseTranspose (perObjectData[unculledNum].normalMat );
359
+ unculledNum++;
360
+ }
361
+ perObjectSSBO->updateSubRange (0 ,unculledNum*sizeof (ObjectData_t),perObjectData);
362
+ video::COpenGLExtensionHandler::extGlBindBuffersBase (GL_SHADER_STORAGE_BUFFER,0 ,1 ,
363
+ &static_cast <video::COpenGLBuffer*>(perObjectSSBO)->getOpenGLName ());
364
+ for (size_t i=0 ; i<unculledNum; i++)
365
+ {
366
+ video::SMaterial material;
367
+ material.MaterialType = cpuCullMaterial;
368
+ reinterpret_cast <uint32_t &>(material.userData ) = i;
369
+ driver->setMaterial (material);
370
+ driver->drawMeshBuffer (mb2draw[i]);
371
+ }
308
372
}
309
373
video::COpenGLExtensionHandler::extGlBindBuffersBase (GL_SHADER_STORAGE_BUFFER,0 ,1 ,NULL );
310
374
@@ -322,6 +386,8 @@ int main()
322
386
}
323
387
}
324
388
perObjectSSBO->drop ();
389
+ indirectDrawBuffer->drop ();
390
+ vaospec->drop ();
325
391
326
392
// create a screenshot
327
393
video::IImage* screenshot = driver->createImage (video::ECF_A8R8G8B8,params.WindowSize );
0 commit comments