Skip to content

Commit 34db10a

Browse files
gw3583wrupdater
authored and
wrupdater
committed
Bug 1531248 - Reduce the number of resolve / copy steps in WR on mobile devices. r=kvark
This patch fixes some wasted GPU time on mobile devices due to redundant resolve / copy steps. In the first case, we would previously do: - Global clear of color / depth on main framebuffer. - Bind and draw off-screen targets. - Bind main framebuffer and draw scene. Between step 1 and 2, a resolve step is triggered on tiled GPU drivers, wasting a lot of GPU time. To fix this, the clear is now deferred until the framebuffer of the first document is drawn. This does slightly change the semantics of how WR does clear operations, but I think it works fine and makes more sense. In the second case, we would previously do: - ... - Draw main framebuffer - End frame and invalidate the contents of input textures. - Bind main framebuffer and draw debug overlay. This also introduces an extra resolve / copy step, even if the debug overlay is not enabled. To fix this, the invalidation step of the input textures to the main framebuffer pass is deferred until all drawing is complete on the main framebuffer, by doing the invalidation in the end_frame() call of the texture resolver. Together, these save a very significant amount of ms per frame in GPU time on the mobile devices I tested. Differential Revision: https://phabricator.services.mozilla.com/D21490 [wrupdater] From https://hg.mozilla.org/mozilla-central/rev/a901d60873b90ab874bfb0a741ca128c4e44a667
1 parent 0508110 commit 34db10a

File tree

1 file changed

+81
-58
lines changed

1 file changed

+81
-58
lines changed

webrender/src/renderer.rs

+81-58
Original file line numberDiff line numberDiff line change
@@ -2541,6 +2541,14 @@ impl Renderer {
25412541
let mut frame_profiles = Vec::new();
25422542
let mut profile_timers = RendererProfileTimers::new();
25432543

2544+
// The texture resolver scope should be outside of any rendering, including
2545+
// debug rendering. This ensures that when we return render targets to the
2546+
// pool via glInvalidateFramebuffer, we don't do any debug rendering after
2547+
// that point. Otherwise, the bind / invalidate / bind logic trips up the
2548+
// render pass logic in tiled / mobile GPUs, resulting in an extra copy /
2549+
// resolve step when the debug overlay is enabled.
2550+
self.texture_resolver.begin_frame();
2551+
25442552
let profile_samplers = {
25452553
let _gm = self.gpu_profile.start_marker("build samples");
25462554
// Block CPU waiting for last frame's GPU profiles to arrive.
@@ -2575,58 +2583,62 @@ impl Renderer {
25752583
});
25762584

25772585
profile_timers.cpu_time.profile(|| {
2578-
let clear_depth_value = if self.are_documents_intersecting_depth() {
2579-
None
2580-
} else {
2581-
Some(1.0)
2582-
};
2586+
// If the documents don't intersect for depth, we can just do
2587+
// a single, global depth clear.
2588+
let clear_depth_per_doc = self.are_documents_intersecting_depth();
25832589

25842590
//Note: another borrowck dance
25852591
let mut active_documents = mem::replace(&mut self.active_documents, Vec::default());
25862592
// sort by the document layer id
25872593
active_documents.sort_by_key(|&(_, ref render_doc)| render_doc.frame.layer);
25882594

2589-
// don't clear the framebuffer if one of the rendered documents will overwrite it
2590-
if let Some(framebuffer_size) = framebuffer_size {
2591-
let needs_color_clear = !active_documents
2592-
.iter()
2593-
.any(|&(_, RenderedDocument { ref frame, .. })| {
2594-
frame.background_color.is_some() &&
2595-
frame.inner_rect.origin == DeviceIntPoint::zero() &&
2596-
frame.inner_rect.size == framebuffer_size
2597-
});
2598-
2599-
if needs_color_clear || clear_depth_value.is_some() {
2600-
let clear_color = if needs_color_clear {
2601-
self.clear_color.map(|color| color.to_array())
2602-
} else {
2603-
None
2604-
};
2605-
self.device.reset_draw_target();
2606-
self.device.enable_depth_write();
2607-
self.device.clear_target(clear_color, clear_depth_value, None);
2608-
self.device.disable_depth_write();
2609-
}
2610-
}
2611-
26122595
#[cfg(feature = "replay")]
26132596
self.texture_resolver.external_images.extend(
26142597
self.owned_external_images.iter().map(|(key, value)| (*key, value.clone()))
26152598
);
26162599

2617-
for &mut (_, RenderedDocument { ref mut frame, .. }) in &mut active_documents {
2600+
for (doc_index, (_, RenderedDocument { ref mut frame, .. })) in active_documents.iter_mut().enumerate() {
26182601
frame.profile_counters.reset_targets();
26192602
self.prepare_gpu_cache(frame);
26202603
assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id,
26212604
"Received frame depends on a later GPU cache epoch ({:?}) than one we received last via `UpdateGpuCache` ({:?})",
26222605
frame.gpu_cache_frame_id, self.gpu_cache_frame_id);
26232606

2607+
// Work out what color to clear the frame buffer for this document.
2608+
// The document's supplied clear color is used, unless:
2609+
// (a) The document has no specified clear color AND
2610+
// (b) We are rendering the first document.
2611+
// If both those conditions are true, the overall renderer
2612+
// clear color will be used, if specified.
2613+
2614+
// Get the default clear color from the renderer.
2615+
let mut fb_clear_color = if doc_index == 0 {
2616+
self.clear_color
2617+
} else {
2618+
None
2619+
};
2620+
2621+
// Override with document clear color if no overall clear
2622+
// color or not on the first document.
2623+
if fb_clear_color.is_none() {
2624+
fb_clear_color = frame.background_color;
2625+
}
2626+
2627+
// Only clear the depth buffer for this document if this is
2628+
// the first document, or we need to clear depth per document.
2629+
let fb_clear_depth = if clear_depth_per_doc || doc_index == 0 {
2630+
Some(1.0)
2631+
} else {
2632+
None
2633+
};
2634+
26242635
self.draw_tile_frame(
26252636
frame,
26262637
framebuffer_size,
2627-
clear_depth_value.is_some(),
26282638
cpu_frame_id,
2629-
&mut results.stats
2639+
&mut results.stats,
2640+
fb_clear_color,
2641+
fb_clear_depth,
26302642
);
26312643

26322644
if self.debug_flags.contains(DebugFlags::PROFILER_DBG) {
@@ -2733,6 +2745,12 @@ impl Renderer {
27332745
if let Some(debug_renderer) = self.debug.try_get_mut() {
27342746
debug_renderer.render(&mut self.device, framebuffer_size);
27352747
}
2748+
// See comment for texture_resolver.begin_frame() for explanation
2749+
// of why this must be done after all rendering, including debug
2750+
// overlays. The end_frame() call implicitly calls end_pass(), which
2751+
// should ensure any left over render targets get invalidated and
2752+
// returned to the pool correctly.
2753+
self.texture_resolver.end_frame(&mut self.device, cpu_frame_id);
27362754
self.device.end_frame();
27372755
});
27382756
if framebuffer_size.is_some() {
@@ -3107,8 +3125,8 @@ impl Renderer {
31073125
draw_target: DrawTarget,
31083126
target: &ColorRenderTarget,
31093127
framebuffer_target_rect: DeviceIntRect,
3110-
depth_is_ready: bool,
31113128
clear_color: Option<[f32; 4]>,
3129+
clear_depth: Option<f32>,
31123130
render_tasks: &RenderTaskTree,
31133131
projection: &Transform3D<f32>,
31143132
frame_id: GpuFrameId,
@@ -3134,12 +3152,9 @@ impl Renderer {
31343152
self.device.disable_depth();
31353153
self.set_blend(false, framebuffer_kind);
31363154

3137-
let depth_clear = if !depth_is_ready && target.needs_depth() {
3155+
if clear_depth.is_some() {
31383156
self.device.enable_depth_write();
3139-
Some(1.0)
3140-
} else {
3141-
None
3142-
};
3157+
}
31433158

31443159
let clear_rect = if !draw_target.is_default() {
31453160
if self.enable_clear_scissor {
@@ -3169,9 +3184,13 @@ impl Renderer {
31693184
Some(rect)
31703185
};
31713186

3172-
self.device.clear_target(clear_color, depth_clear, clear_rect);
3187+
self.device.clear_target(
3188+
clear_color,
3189+
clear_depth,
3190+
clear_rect,
3191+
);
31733192

3174-
if depth_clear.is_some() {
3193+
if clear_depth.is_some() {
31753194
self.device.disable_depth_write();
31763195
}
31773196
}
@@ -4042,9 +4061,10 @@ impl Renderer {
40424061
&mut self,
40434062
frame: &mut Frame,
40444063
framebuffer_size: Option<DeviceIntSize>,
4045-
framebuffer_depth_is_ready: bool,
40464064
frame_id: GpuFrameId,
40474065
stats: &mut RendererStats,
4066+
fb_clear_color: Option<ColorF>,
4067+
fb_clear_depth: Option<f32>,
40484068
) {
40494069
let _gm = self.gpu_profile.start_marker("tile frame draw");
40504070

@@ -4058,7 +4078,6 @@ impl Renderer {
40584078
self.device.disable_stencil();
40594079

40604080
self.bind_frame_data(frame);
4061-
self.texture_resolver.begin_frame();
40624081

40634082
for (pass_index, pass) in frame.passes.iter_mut().enumerate() {
40644083
let _gm = self.gpu_profile.start_marker(&format!("pass {}", pass_index));
@@ -4074,12 +4093,11 @@ impl Renderer {
40744093
&mut self.device,
40754094
);
40764095

4077-
let (cur_alpha, cur_color) = match pass.kind {
4096+
match pass.kind {
40784097
RenderPassKind::MainFramebuffer(ref target) => {
40794098
if let Some(framebuffer_size) = framebuffer_size {
40804099
stats.color_target_count += 1;
40814100

4082-
let clear_color = frame.background_color.map(|color| color.to_array());
40834101
let projection = Transform3D::ortho(
40844102
0.0,
40854103
framebuffer_size.width as f32,
@@ -4093,16 +4111,14 @@ impl Renderer {
40934111
DrawTarget::Default(framebuffer_size),
40944112
target,
40954113
frame.inner_rect,
4096-
framebuffer_depth_is_ready,
4097-
clear_color,
4114+
fb_clear_color.map(|color| color.to_array()),
4115+
fb_clear_depth,
40984116
&frame.render_tasks,
40994117
&projection,
41004118
frame_id,
41014119
stats,
41024120
);
41034121
}
4104-
4105-
(None, None)
41064122
}
41074123
RenderPassKind::OffScreen { ref mut alpha, ref mut color, ref mut texture_cache } => {
41084124
let alpha_tex = self.allocate_target_texture(alpha, &mut frame.profile_counters);
@@ -4166,32 +4182,39 @@ impl Renderer {
41664182
ORTHO_FAR_PLANE,
41674183
);
41684184

4185+
let clear_depth = if target.needs_depth() {
4186+
Some(1.0)
4187+
} else {
4188+
None
4189+
};
4190+
41694191
self.draw_color_target(
41704192
draw_target,
41714193
target,
41724194
frame.inner_rect,
4173-
false,
41744195
Some([0.0, 0.0, 0.0, 0.0]),
4196+
clear_depth,
41754197
&frame.render_tasks,
41764198
&projection,
41774199
frame_id,
41784200
stats,
41794201
);
41804202
}
41814203

4182-
(alpha_tex, color_tex)
4204+
// Only end the pass here and invalidate previous textures for
4205+
// off-screen targets. Deferring return of the inputs to the
4206+
// frame buffer until the implicit end_pass in end_frame allows
4207+
// debug draw overlays to be added without triggering a copy
4208+
// resolve stage in mobile / tiled GPUs.
4209+
self.texture_resolver.end_pass(
4210+
&mut self.device,
4211+
alpha_tex,
4212+
color_tex,
4213+
);
41834214
}
4184-
};
4185-
4186-
self.texture_resolver.end_pass(
4187-
&mut self.device,
4188-
cur_alpha,
4189-
cur_color,
4190-
);
4215+
}
41914216
}
41924217

4193-
self.texture_resolver.end_frame(&mut self.device, frame_id);
4194-
41954218
if let Some(framebuffer_size) = framebuffer_size {
41964219
self.draw_frame_debug_items(&frame.debug_items);
41974220
self.draw_render_target_debug(framebuffer_size);

0 commit comments

Comments
 (0)