Skip to content

Commit 67ee973

Browse files
Merge pull request #293 from Devsh-Graphics-Programming/material_compiler_point_light_fixes
Material compiler point light fixes
2 parents 4168812 + 994e388 commit 67ee973

36 files changed

+1059
-1197
lines changed

.gitmodules

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
url = https://github.com/greg7mdp/parallel-hashmap
8888
[submodule "3rdparty/radeonrays"]
8989
path = 3rdparty/radeonrays
90-
url = https://github.com/devshgraphicsprogramming/RadeonRays_SDK.git
90+
url = git@github.com:devshgraphicsprogramming/RadeonRays_SDK.git
9191
branch = baw
9292
[submodule "3rdparty/jitify"]
9393
path = 3rdparty/jitify

examples_tests/18.MitsubaLoader/main.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,11 @@ vec3 nbl_computeLighting(inout nbl_glsl_AnisotropicViewSurfaceInteraction out_in
7474
nbl_glsl_xoroshiro64star_state_t scramble_state = scramble_start_state;
7575
7676
vec3 rand = rand3d(i,scramble_state);
77-
float pdf;
7877
nbl_glsl_LightSample s;
79-
vec3 rem = nbl_glsl_MC_runGenerateAndRemainderStream(precomp, gcs, rnps, rand, pdf, s);
78+
nbl_glsl_MC_quot_pdf_aov_t rem = nbl_glsl_MC_runGenerateAndRemainderStream(precomp, gcs, rnps, rand, s);
8079
8180
vec2 uv = SampleSphericalMap(s.L);
82-
color += rem*textureLod(envMap, uv, 0.0).xyz;
81+
color += rem.quotient*textureLod(envMap, uv, 0.0).xyz;
8382
}
8483
color /= float(SAMPLE_COUNT);
8584
#endif

examples_tests/22.RaytracedAO/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ You can switch between those sensors using `PAGE UP/DOWN` Keys defined in more d
106106
| HOME | Press to reset the camera to the initial view. (Usefull when you're lost and you want to go back to where you started) |
107107
| P | Press to take a snapshot when moving around (will be denoised) |
108108
| L | Press to log the current progress percentage and samples rendered. |
109+
| B | Toggle between Path Tracing and Albedo preview, allows you to position the camera more responsively in complex scenes. |
109110

110111
## Denoiser Hook
111112
`denoiser_hook.bat` is a script that you can call to denoise your rendered images.

examples_tests/22.RaytracedAO/Renderer.cpp

+229-337
Large diffs are not rendered by default.

examples_tests/22.RaytracedAO/Renderer.h

+10-35
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
5252

5353
void takeAndSaveScreenShot(const std::filesystem::path& screenshotFilePath, const DenoiserArgs& denoiserArgs);
5454

55-
bool render(nbl::ITimer* timer);
55+
bool render(nbl::ITimer* timer, const bool beauty=true);
5656

5757
auto* getColorBuffer() { return m_colorBuffer; }
5858

@@ -65,11 +65,11 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
6565
uint64_t getTotalSamplesPerPixelComputed() const
6666
{
6767
const auto framesDispatched = static_cast<uint64_t>(m_framesDispatched);
68-
return framesDispatched*m_staticViewData.samplesPerPixelPerDispatch;
68+
return framesDispatched*getSamplesPerPixelPerDispatch();
6969
}
7070
uint64_t getTotalSamplesComputed() const
7171
{
72-
const auto samplesPerDispatch = static_cast<uint64_t>(m_staticViewData.samplesPerPixelPerDispatch*m_staticViewData.imageDimensions.x*m_staticViewData.imageDimensions.y);
72+
const auto samplesPerDispatch = static_cast<uint64_t>(getSamplesPerPixelPerDispatch()*m_staticViewData.imageDimensions.x*m_staticViewData.imageDimensions.y);
7373
const auto framesDispatched = static_cast<uint64_t>(m_framesDispatched);
7474
return framesDispatched*samplesPerDispatch;
7575
}
@@ -78,16 +78,11 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
7878
return m_totalRaysCast;
7979
}
8080

81-
//! Brief guideline to good path depth limits
82-
// Want to see stuff with indirect lighting on the other side of a pane of glass
83-
// 5 = glass frontface->glass backface->diffuse surface->diffuse surface->light
84-
// Want to see through a glass box, vase, or office
85-
// 7 = glass frontface->glass backface->glass frontface->glass backface->diffuse surface->diffuse surface->light
86-
// pick higher numbers for better GI and less bias
87-
_NBL_STATIC_INLINE_CONSTEXPR uint32_t MaxPathDepth = 8u;
88-
_NBL_STATIC_INLINE_CONSTEXPR uint32_t RandomDimsPerPathVertex = 3u;
89-
// one less because the first path vertex is rasterized
90-
_NBL_STATIC_INLINE_CONSTEXPR uint32_t MaxDimensions = RandomDimsPerPathVertex*(MaxPathDepth-1u);
81+
// The primary limiting factor is the precision of turning a fixed point grid sample to IEEE754 32bit float in the [0,1] range.
82+
// Mantissa is only 23 bits, and primary sample space low discrepancy sequence will start to produce duplicates
83+
// near 1.0 with exponent -1 after the sample count passes 2^24 elements.
84+
// Another limiting factor is our encoding of sample sequences, we only use 21bits per channel, so no duplicates till 2^21 samples.
85+
static inline constexpr uint32_t MaxSamples = 0x10000u;// 0x200000;
9186

9287
//
9388
static constexpr inline uint32_t AntiAliasingSequenceLength = 1024;
@@ -126,7 +121,8 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
126121
nbl::core::smart_refctd_ptr<nbl::video::IGPUImageView> createScreenSizedTexture(nbl::asset::E_FORMAT format, uint32_t layers = 0u);
127122

128123
//
129-
bool Renderer::traceBounce(uint32_t & inoutRayCount);
124+
void preDispatch(const nbl::video::IGPUPipelineLayout* layout, nbl::video::IGPUDescriptorSet*const *const lastDS);
125+
bool traceBounce(uint32_t& inoutRayCount);
130126

131127
//
132128
const nbl::ext::MitsubaLoader::CMitsubaMetadata* m_globalMeta = nullptr;
@@ -141,11 +137,6 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
141137
nbl::scene::ISceneManager* m_smgr;
142138

143139
nbl::core::smart_refctd_ptr<nbl::ext::RadeonRays::Manager> m_rrManager;
144-
#ifdef _NBL_BUILD_OPTIX_
145-
nbl::core::smart_refctd_ptr<nbl::ext::OptiX::Manager> m_optixManager;
146-
CUstream m_cudaStream;
147-
nbl::core::smart_refctd_ptr<nbl::ext::OptiX::IContext> m_optixContext;
148-
#endif
149140

150141

151142
// persistent (intialized in constructor
@@ -219,22 +210,6 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
219210
nbl::core::smart_refctd_ptr<nbl::video::IGPUImageView> m_finalEnvmap;
220211

221212
std::future<bool> compileShadersFuture;
222-
223-
#ifdef _NBL_BUILD_OPTIX_
224-
nbl::core::smart_refctd_ptr<nbl::ext::OptiX::IDenoiser> m_denoiser;
225-
OptixDenoiserSizes m_denoiserMemReqs;
226-
nbl::cuda::CCUDAHandler::GraphicsAPIObjLink<nbl::video::IGPUBuffer> m_denoiserInputBuffer,m_denoiserStateBuffer,m_denoisedBuffer,m_denoiserScratchBuffer;
227-
228-
enum E_DENOISER_INPUT
229-
{
230-
EDI_COLOR,
231-
EDI_ALBEDO,
232-
EDI_NORMAL,
233-
EDI_COUNT
234-
};
235-
OptixImage2D m_denoiserOutput;
236-
OptixImage2D m_denoiserInputs[EDI_COUNT];
237-
#endif
238213
};
239214

240215
#endif

examples_tests/22.RaytracedAO/closestHit.comp

+11-7
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ void main()
3636
{
3737
// basic reads
3838
const uint vertex_depth = get_path_vertex_depth();
39-
const uint vertex_depth_mod_2 = vertex_depth&0x1u;
4039
const nbl_glsl_ext_RadeonRays_Intersection intersection = intersections[gl_GlobalInvocationID.x];
4140
const nbl_glsl_ext_RadeonRays_ray ray = sourceRays[gl_GlobalInvocationID.x];
4241

@@ -78,15 +77,19 @@ void main()
7877
const nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(batchInstanceData.material,frontfacing);
7978
contrib.color = contrib.albedo = nbl_glsl_MC_oriented_material_t_getEmissive(material);
8079

81-
const bool _continue = vertex_depth!=MAX_PATH_DEPTH && ray.maxT==nbl_glsl_FLT_MAX; // last vertex or was a NEE path
80+
const uint pathDepth = bitfieldExtract(staticViewData.pathDepth_noRussianRouletteDepth_samplesPerPixelPerDispatch,0,8);
81+
const bool _continue = vertex_depth!=pathDepth && ray.maxT==nbl_glsl_FLT_MAX; // not last vertex and not NEE path
8282
if (_continue)
8383
{
8484
// if we ever support spatially varying emissive, we'll need to hoist barycentric computation and UV fetching to the position fetching
85-
const vec2 compactBary = vec2(1.f-intersection.uvwt.x-intersection.uvwt.y,intersection.uvwt.x); // radeon rays is a special boy
85+
const vec2 compactBary = vec2(1.f-intersection.uv.x-intersection.uv.y,intersection.uv.x); // radeon rays is a special boy and does its barycentrics weird
8686

87-
const nbl_glsl_xoroshiro64star_state_t scramble_start_state = load_aux_vertex_attrs(
88-
batchInstanceData,indices,compactBary,geomDenormal,
89-
material,outPixelLocation,vertex_depth
87+
//
88+
const nbl_glsl_xoroshiro64star_state_t scramble_start_state = nbl_glsl_xoroshiro64star_state_t(ray.mask,ray._active);
89+
90+
//
91+
normalizedN = load_normal_and_prefetch_textures(
92+
batchInstanceData,indices,compactBary,geomDenormal,material
9093
#ifdef TEX_PREFETCH_STREAM
9194
,mat2(0.0) // TODO: Covariance Rendering
9295
#endif
@@ -107,7 +110,8 @@ void main()
107110

108111
Contribution_normalizeAoV(contrib);
109112

110-
const uvec3 accumulationLocation = uvec3(outPixelLocation,sampleID%staticViewData.samplesPerPixelPerDispatch);
113+
const uint samplesPerPixelPerDispatch = bitfieldExtract(staticViewData.pathDepth_noRussianRouletteDepth_samplesPerPixelPerDispatch,16,16);
114+
const uvec3 accumulationLocation = uvec3(outPixelLocation,sampleID%samplesPerPixelPerDispatch);
111115
const vec3 acc_emissive = fetchAccumulation(accumulationLocation);
112116
const vec3 acc_albedo = fetchAlbedo(accumulationLocation);
113117
const vec3 acc_worldspaceNormal = fetchWorldspaceNormal(accumulationLocation);

examples_tests/22.RaytracedAO/common.h

+12-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,18 @@
66
#define RAYCOUNT_N_BUFFERING_MASK (RAYCOUNT_N_BUFFERING-1)
77

88
#define MAX_TRIANGLES_IN_BATCH 16384
9-
#define MAX_ACCUMULATED_SAMPLES 0x10000
9+
10+
//! Brief guideline to good path depth limits
11+
// Want to see stuff with indirect lighting on the other side of a pane of glass
12+
// 5 = glass frontface->glass backface->diffuse surface->diffuse surface->light
13+
// Want to see through a glass box, vase, or office
14+
// 7 = glass frontface->glass backface->glass frontface->glass backface->diffuse surface->diffuse surface->light
15+
// pick higher numbers for better GI and less bias
16+
#define MAX_PATH_DEPTH 35
17+
// need to bump to 2 in case of NEE + MIS, 3 in case of Path Guiding
18+
#define SAMPLING_STRATEGY_COUNT 1
19+
// One less because the first vertex is rasterized
20+
#define QUANTIZED_DIMENSIONS_PER_SAMPLE ((MAX_PATH_DEPTH-1)*SAMPLING_STRATEGY_COUNT)
1021

1122

1223
#define WORKGROUP_SIZE 256

examples_tests/22.RaytracedAO/main.cpp

+49-39
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,9 @@ using namespace core;
2323
class RaytracerExampleEventReceiver : public nbl::IEventReceiver
2424
{
2525
public:
26-
RaytracerExampleEventReceiver()
27-
: running(true)
28-
, skipKeyPressed(false)
29-
, resetViewKeyPressed(false)
30-
, nextKeyPressed(false)
31-
, previousKeyPressed(false)
32-
, screenshotKeyPressed(false)
26+
RaytracerExampleEventReceiver() : running(true), renderingBeauty(true)
3327
{
28+
resetKeys();
3429
}
3530

3631
bool OnEvent(const nbl::SEvent& event)
@@ -57,6 +52,9 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver
5752
case SkipKey:
5853
skipKeyPressed = true;
5954
break;
55+
case BeautyKey:
56+
renderingBeauty = !renderingBeauty;
57+
break;
6058
case QuitKey:
6159
running = false;
6260
return true;
@@ -82,6 +80,8 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver
8280

8381
inline bool isLogProgressKeyPressed() const { return logProgressKeyPressed; }
8482

83+
inline bool isRenderingBeauty() const { return renderingBeauty; }
84+
8585
inline void resetKeys()
8686
{
8787
skipKeyPressed = false;
@@ -100,15 +100,17 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver
100100
static constexpr nbl::EKEY_CODE PreviousKey = nbl::KEY_NEXT; // PAGE_DOWN
101101
static constexpr nbl::EKEY_CODE ScreenshotKey = nbl::KEY_KEY_P;
102102
static constexpr nbl::EKEY_CODE LogProgressKey = nbl::KEY_KEY_L;
103+
static constexpr nbl::EKEY_CODE BeautyKey = nbl::KEY_KEY_B;
103104

104-
bool running = false;
105-
bool skipKeyPressed = false;
106-
bool resetViewKeyPressed = false;
107-
bool nextKeyPressed = false;
108-
bool previousKeyPressed = false;
109-
bool screenshotKeyPressed = false;
110-
bool logProgressKeyPressed = false;
105+
bool running;
106+
bool renderingBeauty;
111107

108+
bool skipKeyPressed;
109+
bool resetViewKeyPressed;
110+
bool nextKeyPressed;
111+
bool previousKeyPressed;
112+
bool screenshotKeyPressed;
113+
bool logProgressKeyPressed;
112114
};
113115

114116
int main(int argc, char** argv)
@@ -556,40 +558,47 @@ int main(int argc, char** argv)
556558
auto driver = device->getVideoDriver();
557559

558560
core::smart_refctd_ptr<Renderer> renderer = core::make_smart_refctd_ptr<Renderer>(driver,device->getAssetManager(),smgr);
559-
constexpr uint32_t MaxSamples = MAX_ACCUMULATED_SAMPLES;
560-
auto sampleSequence = core::make_smart_refctd_ptr<asset::ICPUBuffer>(sizeof(uint32_t)*MaxSamples*Renderer::MaxDimensions);
561+
auto sampleSequence = core::make_smart_refctd_ptr<asset::ICPUBuffer>(sizeof(uint64_t)*Renderer::MaxSamples*QUANTIZED_DIMENSIONS_PER_SAMPLE);
561562
{
562563
bool generateNewSamples = true;
563564

564565
io::IReadFile* cacheFile = device->getFileSystem()->createAndOpenFile("../../tmp/rtSamples.bin");
565566
if (cacheFile)
566567
{
567-
if (cacheFile->getSize()>=sampleSequence->getSize()) // light validation
568+
if (cacheFile->getSize()==sampleSequence->getSize()) // light validation
568569
{
569570
cacheFile->read(sampleSequence->getPointer(),sampleSequence->getSize());
570-
generateNewSamples = false;
571+
//generateNewSamples = false;
571572
}
572573
cacheFile->drop();
573574
}
574575

575576
if (generateNewSamples)
576577
{
577-
/** TODO: move into the renderer and redo the sampling (compress into R21G21B21_UINT)
578-
Locality Level 0: the 3 dimensions consumed for a BxDF or NEE sample
579-
Locality Level 1: the k = 3 (1 + NEE) samples which will be consumed in the same invocation
580-
Locality Level 2-COMP: the N = k dispatchSPP Resolution samples consumed by a raygen dispatch (another TODO: would be order CS and everything in a morton curve)
581-
Locality Level 2-RTX: the N = k Depth samples consumed as we recurse deeper
582-
Locality Level 3: the D = k dispatchSPP Resolution Depth samples consumed as we accumuate more samples
583-
**/
584-
constexpr uint32_t Channels = 3u;
585-
static_assert(Renderer::MaxDimensions%Channels==0u,"We cannot have this!");
586-
core::OwenSampler sampler(Renderer::MaxDimensions,0xdeadbeefu);
587-
588-
uint32_t (&out)[][Channels] = *reinterpret_cast<uint32_t(*)[][Channels]>(sampleSequence->getPointer());
589-
for (auto realdim=0u; realdim<Renderer::MaxDimensions/Channels; realdim++)
590-
for (auto c=0u; c<Channels; c++)
591-
for (uint32_t i=0; i<MaxSamples; i++)
592-
out[realdim*MaxSamples+i][c] = sampler.sample(realdim*Channels+c,i);
578+
constexpr auto DimensionsPerQuanta = 3u;
579+
core::OwenSampler sampler(QUANTIZED_DIMENSIONS_PER_SAMPLE*DimensionsPerQuanta,0xdeadbeefu);
580+
581+
// Memory Order: 3 Dimensions, then multiple of sampling stragies per vertex, then depth, then sample ID
582+
uint32_t(&pout)[][2] = *reinterpret_cast<uint32_t(*)[][2]>(sampleSequence->getPointer());
583+
// the horrible order of iteration over output memory is caused by the fact that certain samplers like the
584+
// Owen Scramble sampler, have a large cache which needs to be generated separately for each dimension.
585+
for (auto metadim=0u; metadim<QUANTIZED_DIMENSIONS_PER_SAMPLE; metadim++)
586+
{
587+
const auto trudim = metadim*DimensionsPerQuanta;
588+
for (uint32_t i=0; i<Renderer::MaxSamples; i++)
589+
pout[i*QUANTIZED_DIMENSIONS_PER_SAMPLE+metadim][0] = sampler.sample(trudim+0u,i);
590+
for (uint32_t i=0; i<Renderer::MaxSamples; i++)
591+
pout[i*QUANTIZED_DIMENSIONS_PER_SAMPLE+metadim][1] = sampler.sample(trudim+1u,i);
592+
for (uint32_t i=0; i<Renderer::MaxSamples; i++)
593+
{
594+
const auto sample = sampler.sample(trudim+2u,i);
595+
const auto out = pout[i*QUANTIZED_DIMENSIONS_PER_SAMPLE+metadim];
596+
out[0] &= 0xFFFFF800u;
597+
out[0] |= sample>>21;
598+
out[1] &= 0xFFFFF800u;
599+
out[1] |= (sample>>10)&0x07FFu;
600+
}
601+
}
593602

594603
io::IWriteFile* cacheFile = device->getFileSystem()->createAndWriteFile("../../tmp/rtSamples.bin");
595604
if (cacheFile)
@@ -659,7 +668,7 @@ int main(int argc, char** argv)
659668
prevHeight = sensorData.height;
660669

661670
renderer->resetSampleAndFrameCounters(); // so that renderer->getTotalSamplesPerPixelComputed is 0 at the very beginning
662-
if(needsReinit)
671+
if(needsReinit)
663672
{
664673
renderer->deinitScreenSizedResources();
665674
renderer->initScreenSizedResources(sensorData.width, sensorData.height, std::move(sampleSequence));
@@ -817,8 +826,7 @@ int main(int argc, char** argv)
817826
}
818827

819828
driver->beginScene(false, false);
820-
821-
if(!renderer->render(device->getTimer()))
829+
if(!renderer->render(device->getTimer(),receiver.isRenderingBeauty()))
822830
{
823831
renderFailed = true;
824832
driver->endScene();
@@ -838,8 +846,10 @@ int main(int argc, char** argv)
838846
std::wostringstream str;
839847
auto samples = renderer->getTotalSamplesComputed();
840848
auto rays = renderer->getTotalRaysCast();
841-
str << L"Raytraced Shadows Demo - Nabla Engine MegaSamples: " << samples/1000000ull << " MRay/s: "
842-
<< double(rays)/double(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now()-start).count());
849+
const double microsecondsElapsed = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now()-start).count();
850+
str << L"Raytraced Shadows Demo - Nabla Engine MegaSamples: " << samples/1000000ull
851+
<< " MSample/s: " << double(samples)/microsecondsElapsed
852+
<< " MRay/s: " << double(rays)/microsecondsElapsed;
843853

844854
device->setWindowCaption(str.str());
845855
lastFPSTime = time;

examples_tests/22.RaytracedAO/present2D.frag

-17
This file was deleted.

0 commit comments

Comments
 (0)