Skip to content

Commit b7fb9f8

Browse files
authored
Merge pull request GafferHQ#4213 from johnhaddon/parallelProcessLocationsImprovements
SceneAlgo/FilterResults improvements
2 parents 36742bf + 438250b commit b7fb9f8

File tree

6 files changed

+134
-195
lines changed

6 files changed

+134
-195
lines changed

Changes.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ Improvements
2626
- Context : Optimized `hash()` method, and reduced overhead in `EditableScope`.
2727
- NameSwitch/Spreadsheet : Rows with an empty name are now treated as if they were disabled. See Breaking Changes for further details.
2828
- ContextVariables : Improved performance by around 50%.
29+
- FilterResults : Improved performance.
30+
- SceneAlgo : Reduced threading overhead for `parallelProcessLocations()`, `parallelTraverse()` and `filteredParallelTraverse()`. This is particularly noticeable when visiting locations with many children.
2931

3032
Fixes
3133
-----
@@ -53,6 +55,7 @@ API
5355
- Added `setHoverPositionVisible()` and `getHoverPositionVisible()` accessors to control an optional position indicator drawn under the pointer.
5456
- Expression : Added `Engine::executeCachePolicy()` method which must be implemented by subclasses.
5557
- ImageAlgo : Added constants for the default channel names - `channelNameR` etc.
58+
- SceneAlgo : Added optional `root` argument to `filteredParallelTraverse( scene, pathMatcher )`.
5659

5760
Breaking Changes
5861
----------------
@@ -71,7 +74,9 @@ Breaking Changes
7174
- Removed `setPositionIncrement()/getPositionIncrement()` from `Slider`. Use `setIncrement()/getIncrement()` instead.
7275
- Replaced `_drawPosition()` method with `_drawValue()`.
7376
- StandardOptions : Removed `cameraBlur` plug. This never functioned as advertised, as the regular `transformBlur` and `deformationBlur` blur settings were applied to cameras instead. As before, a StandardAttributes node may be used to customise blur for individual cameras.
74-
- SceneAlgo : Changed signature of the following methods to use `GafferScene::FilterPlug` : `matchingPaths`, `filteredParallelTraverse`, `Detail::ThreadableFilteredFunctor`.
77+
- SceneAlgo :
78+
- Changed signature of the following methods to use `GafferScene::FilterPlug` : `matchingPaths`, `filteredParallelTraverse`, `Detail::ThreadableFilteredFunctor`.
79+
- Removed `filteredParallelTraverse()` overload which accepted a `Filter *`. Pass `filter->outPlug()` instead.
7580
- DeleteFaces / DeletePoints / DeleteCurves : The PrimitiveVariable name is now taken verbatim, rather than stripping whitespace.
7681
- Serialisation :
7782
- Disabled copy construction.

include/GafferScene/SceneAlgo.h

+8-24
Original file line numberDiff line numberDiff line change
@@ -121,36 +121,20 @@ GAFFERSCENE_API IECore::MurmurHash matchingPathsHash( const IECore::PathMatcher
121121
/// };
122122
/// ```
123123
template <class ThreadableFunctor>
124-
void parallelProcessLocations( const GafferScene::ScenePlug *scene, ThreadableFunctor &f );
125-
/// As above, but starting the traversal at the specified root.
126-
/// \todo Add default value for `root` and remove overload above.
127-
template <class ThreadableFunctor>
128-
void parallelProcessLocations( const GafferScene::ScenePlug *scene, ThreadableFunctor &f, const ScenePlug::ScenePath &root );
124+
void parallelProcessLocations( const GafferScene::ScenePlug *scene, ThreadableFunctor &f, const ScenePlug::ScenePath &root = ScenePlug::ScenePath() );
129125

130-
/// Calls a functor on all paths in the scene
131-
/// The functor must take ( const ScenePlug*, const ScenePlug::ScenePath& ), and can return false to prune traversal
132-
template <class ThreadableFunctor>
133-
void parallelTraverse( const ScenePlug *scene, ThreadableFunctor &f );
134-
/// As above, but starting the traversal at `root`.
135-
/// \todo Add default value for `root` and remove overload above.
126+
/// Calls a functor on all locations in the scene. This differs from `parallelProcessLocations()` in that a single instance
127+
/// of the functor is used for all locations.
128+
/// The functor must take `( const ScenePlug *, const ScenePlug::ScenePath & )`, and can return false to prune traversal.
136129
template <class ThreadableFunctor>
137-
void parallelTraverse( const ScenePlug *scene, ThreadableFunctor &f, const ScenePlug::ScenePath &root );
130+
void parallelTraverse( const ScenePlug *scene, ThreadableFunctor &f, const ScenePlug::ScenePath &root = ScenePlug::ScenePath() );
138131

139-
/// Calls a functor on all paths in the scene that are matched by the filter.
140-
/// The functor must take ( const ScenePlug*, const ScenePlug::ScenePath& ), and can return false to prune traversal
141-
template <class ThreadableFunctor>
142-
void filteredParallelTraverse( const ScenePlug *scene, const GafferScene::Filter *filter, ThreadableFunctor &f );
143-
/// As above, but specifying the filter as a plug - typically Filter::outPlug() or
144-
/// FilteredSceneProcessor::filterPlug() would be passed.
145-
template <class ThreadableFunctor>
146-
void filteredParallelTraverse( const ScenePlug *scene, const FilterPlug *filterPlug, ThreadableFunctor &f );
147-
/// As above, but starting the traversal at `root`.
148-
/// \todo Add default value for `root` and remove overload above.
132+
/// As for `parallelTraverse()`, but only calling the functor for locations matched by the filter.
149133
template <class ThreadableFunctor>
150-
void filteredParallelTraverse( const ScenePlug *scene, const FilterPlug *filterPlug, ThreadableFunctor &f, const ScenePlug::ScenePath &root );
134+
void filteredParallelTraverse( const ScenePlug *scene, const FilterPlug *filterPlug, ThreadableFunctor &f, const ScenePlug::ScenePath &root = ScenePlug::ScenePath() );
151135
/// As above, but using a PathMatcher as a filter.
152136
template <class ThreadableFunctor>
153-
void filteredParallelTraverse( const ScenePlug *scene, const IECore::PathMatcher &filter, ThreadableFunctor &f );
137+
void filteredParallelTraverse( const ScenePlug *scene, const IECore::PathMatcher &filter, ThreadableFunctor &f, const ScenePlug::ScenePath &root = ScenePlug::ScenePath() );
154138

155139
/// Globals
156140
/// =======

include/GafferScene/SceneAlgo.inl

+43-161
Original file line numberDiff line numberDiff line change
@@ -36,149 +36,55 @@
3636

3737
#include "Gaffer/Context.h"
3838

39-
#include "tbb/task.h"
39+
#include "tbb/parallel_for.h"
4040

4141
namespace GafferScene
4242
{
4343

4444
namespace Detail
4545
{
4646

47-
template <class ThreadableFunctor>
48-
class TraverseTask : public tbb::task
49-
{
50-
51-
public :
52-
53-
TraverseTask(
54-
const GafferScene::ScenePlug *scene,
55-
const Gaffer::ThreadState &threadState,
56-
ThreadableFunctor &f
57-
)
58-
: m_scene( scene ), m_threadState( threadState ), m_f( f )
59-
{
60-
}
61-
62-
TraverseTask(
63-
const GafferScene::ScenePlug *scene,
64-
const Gaffer::ThreadState &threadState,
65-
const ScenePlug::ScenePath &path,
66-
ThreadableFunctor &f
67-
)
68-
: m_scene( scene ), m_threadState( threadState ), m_f( f ), m_path( path )
69-
{
70-
}
71-
72-
73-
~TraverseTask() override
74-
{
75-
}
76-
77-
task *execute() override
78-
{
79-
ScenePlug::PathScope pathScope( m_threadState, &m_path );
80-
81-
if( m_f( m_scene, m_path ) )
82-
{
83-
IECore::ConstInternedStringVectorDataPtr childNamesData = m_scene->childNamesPlug()->getValue();
84-
const std::vector<IECore::InternedString> &childNames = childNamesData->readable();
85-
86-
set_ref_count( 1 + childNames.size() );
87-
88-
ScenePlug::ScenePath childPath = m_path;
89-
childPath.push_back( IECore::InternedString() ); // space for the child name
90-
for( std::vector<IECore::InternedString>::const_iterator it = childNames.begin(), eIt = childNames.end(); it != eIt; it++ )
91-
{
92-
childPath[m_path.size()] = *it;
93-
TraverseTask *t = new( allocate_child() ) TraverseTask( *this, childPath );
94-
spawn( *t );
95-
}
96-
wait_for_all();
97-
}
98-
99-
return nullptr;
100-
}
101-
102-
protected :
103-
104-
TraverseTask( const TraverseTask &other, const ScenePlug::ScenePath &path )
105-
: m_scene( other.m_scene ),
106-
m_threadState( other.m_threadState ),
107-
m_f( other.m_f ),
108-
m_path( path )
109-
{
110-
}
111-
112-
private :
113-
114-
const GafferScene::ScenePlug *m_scene;
115-
const Gaffer::ThreadState &m_threadState;
116-
ThreadableFunctor &m_f;
117-
GafferScene::ScenePlug::ScenePath m_path;
118-
119-
};
120-
12147
template<typename ThreadableFunctor>
122-
class LocationTask : public tbb::task
48+
void parallelProcessLocationsWalk( const GafferScene::ScenePlug *scene, const Gaffer::ThreadState &threadState, const ScenePlug::ScenePath &path, ThreadableFunctor &f, tbb::task_group_context &taskGroupContext )
12349
{
50+
ScenePlug::PathScope pathScope( threadState, &path );
12451

125-
public :
52+
if( !f( scene, path ) )
53+
{
54+
return;
55+
}
12656

127-
LocationTask(
128-
const GafferScene::ScenePlug *scene,
129-
const Gaffer::ThreadState &threadState,
130-
const ScenePlug::ScenePath &path,
131-
ThreadableFunctor &f
132-
)
133-
: m_scene( scene ), m_threadState( threadState ), m_path( path ), m_f( f )
134-
{
135-
}
57+
IECore::ConstInternedStringVectorDataPtr childNamesData = scene->childNamesPlug()->getValue();
58+
const std::vector<IECore::InternedString> &childNames = childNamesData->readable();
59+
if( childNames.empty() )
60+
{
61+
return;
62+
}
13663

137-
~LocationTask() override
138-
{
139-
}
64+
using ChildNameRange = tbb::blocked_range<std::vector<IECore::InternedString>::const_iterator>;
65+
const ChildNameRange loopRange( childNames.begin(), childNames.end() );
14066

141-
task *execute() override
67+
auto loopBody = [&] ( const ChildNameRange &range ) {
68+
ScenePlug::ScenePath childPath = path;
69+
childPath.push_back( IECore::InternedString() ); // Space for the child name
70+
for( auto &childName : range )
14271
{
143-
ScenePlug::PathScope pathScope( m_threadState, &m_path );
144-
145-
if( !m_f( m_scene, m_path ) )
146-
{
147-
return nullptr;
148-
}
149-
150-
IECore::ConstInternedStringVectorDataPtr childNamesData = m_scene->childNamesPlug()->getValue();
151-
const std::vector<IECore::InternedString> &childNames = childNamesData->readable();
152-
if( childNames.empty() )
153-
{
154-
return nullptr;
155-
}
156-
157-
std::vector<ThreadableFunctor> childFunctors( childNames.size(), m_f );
158-
159-
set_ref_count( 1 + childNames.size() );
160-
161-
ScenePlug::ScenePath childPath = m_path;
162-
childPath.push_back( IECore::InternedString() ); // space for the child name
163-
for( size_t i = 0, e = childNames.size(); i < e; ++i )
164-
{
165-
childPath.back() = childNames[i];
166-
LocationTask *t = new( allocate_child() ) LocationTask( m_scene, m_threadState, childPath, childFunctors[i] );
167-
spawn( *t );
168-
}
169-
wait_for_all();
170-
171-
return nullptr;
72+
ThreadableFunctor childFunctor( f );
73+
childPath.back() = childName;
74+
parallelProcessLocationsWalk( scene, threadState, childPath, childFunctor, taskGroupContext );
17275
}
76+
};
17377

174-
private :
175-
176-
const GafferScene::ScenePlug *m_scene;
177-
const Gaffer::ThreadState &m_threadState;
178-
const GafferScene::ScenePlug::ScenePath m_path;
179-
ThreadableFunctor &m_f;
180-
181-
};
78+
if( childNames.size() > 1 )
79+
{
80+
tbb::parallel_for( loopRange, loopBody, taskGroupContext );
81+
}
82+
else
83+
{
84+
// Serial execution
85+
loopBody( loopRange );
86+
}
87+
}
18288

18389
template <class ThreadableFunctor>
18490
struct ThreadableFilteredFunctor
@@ -240,47 +146,23 @@ struct PathMatcherFunctor
240146
namespace SceneAlgo
241147
{
242148

243-
template <class ThreadableFunctor>
244-
void parallelProcessLocations( const GafferScene::ScenePlug *scene, ThreadableFunctor &f )
245-
{
246-
return parallelProcessLocations( scene, f, ScenePlug::ScenePath() );
247-
}
248-
249149
template <class ThreadableFunctor>
250150
void parallelProcessLocations( const GafferScene::ScenePlug *scene, ThreadableFunctor &f, const ScenePlug::ScenePath &root )
251151
{
252152
tbb::task_group_context taskGroupContext( tbb::task_group_context::isolated ); // Prevents outer tasks silently cancelling our tasks
253-
Detail::LocationTask<ThreadableFunctor> *task = new( tbb::task::allocate_root( taskGroupContext ) ) Detail::LocationTask<ThreadableFunctor>( scene, Gaffer::ThreadState::current(), root, f );
254-
tbb::task::spawn_root_and_wait( *task );
255-
}
256-
257-
template <class ThreadableFunctor>
258-
void parallelTraverse( const GafferScene::ScenePlug *scene, ThreadableFunctor &f )
259-
{
260-
tbb::task_group_context taskGroupContext( tbb::task_group_context::isolated ); // Prevents outer tasks silently cancelling our tasks
261-
Detail::TraverseTask<ThreadableFunctor> *task = new( tbb::task::allocate_root( taskGroupContext ) ) Detail::TraverseTask<ThreadableFunctor>( scene, Gaffer::ThreadState::current(), f );
262-
tbb::task::spawn_root_and_wait( *task );
153+
Detail::parallelProcessLocationsWalk( scene, Gaffer::ThreadState::current(), root, f, taskGroupContext );
263154
}
264155

265156
template <class ThreadableFunctor>
266157
void parallelTraverse( const ScenePlug *scene, ThreadableFunctor &f, const ScenePlug::ScenePath &root )
267158
{
268-
tbb::task_group_context taskGroupContext( tbb::task_group_context::isolated ); // Prevents outer tasks silently cancelling our tasks
269-
Detail::TraverseTask<ThreadableFunctor> *task = new( tbb::task::allocate_root( taskGroupContext ) ) Detail::TraverseTask<ThreadableFunctor>( scene, Gaffer::ThreadState::current(), root, f );
270-
tbb::task::spawn_root_and_wait( *task );
271-
}
272-
273-
template <class ThreadableFunctor>
274-
void filteredParallelTraverse( const GafferScene::ScenePlug *scene, const GafferScene::Filter *filter, ThreadableFunctor &f )
275-
{
276-
filteredParallelTraverse( scene, filter->outPlug(), f );
277-
}
278-
279-
template <class ThreadableFunctor>
280-
void filteredParallelTraverse( const GafferScene::ScenePlug *scene, const GafferScene::FilterPlug *filterPlug, ThreadableFunctor &f )
281-
{
282-
Detail::ThreadableFilteredFunctor<ThreadableFunctor> ff( f, filterPlug );
283-
parallelTraverse( scene, ff );
159+
// `parallelProcessLocations()` takes a copy of the functor at each location, whereas
160+
// `parallelTraverse()` is intended to use the same functor for all locations. Wrap the
161+
// functor in a cheap-to-copy lambda, so that the functor itself won't be copied.
162+
auto reference = [&f] ( const ScenePlug *scene, const ScenePlug::ScenePath &path ) {
163+
return f( scene, path );
164+
};
165+
parallelProcessLocations( scene, reference, root );
284166
}
285167

286168
template <class ThreadableFunctor>
@@ -291,10 +173,10 @@ void filteredParallelTraverse( const ScenePlug *scene, const GafferScene::Filter
291173
}
292174

293175
template <class ThreadableFunctor>
294-
void filteredParallelTraverse( const ScenePlug *scene, const IECore::PathMatcher &filter, ThreadableFunctor &f )
176+
void filteredParallelTraverse( const ScenePlug *scene, const IECore::PathMatcher &filter, ThreadableFunctor &f, const ScenePlug::ScenePath &root )
295177
{
296178
Detail::PathMatcherFunctor<ThreadableFunctor> ff( f, filter );
297-
parallelTraverse( scene, ff );
179+
parallelTraverse( scene, ff, root );
298180
}
299181

300182
} // namespace SceneAlgo

python/GafferSceneTest/FilterResultsTest.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ def testRootMatchVsNoMatch( self ) :
255255

256256
@unittest.skipIf( GafferTest.inCI(), "Performance not relevant on CI platform" )
257257
@GafferTest.TestRunner.PerformanceTestMethod()
258-
def testHashPerf( self ):
258+
def testHashPerformance( self ):
259259

260260
sphere = GafferScene.Sphere()
261261

@@ -277,5 +277,29 @@ def testHashPerf( self ):
277277
with GafferTest.TestRunner.PerformanceScope():
278278
filterResults["out"].hash()
279279

280+
@unittest.skipIf( GafferTest.inCI(), "Performance not relevant on CI platform" )
281+
@GafferTest.TestRunner.PerformanceTestMethod()
282+
def testComputePerformance( self ):
283+
284+
sphere = GafferScene.Sphere()
285+
286+
duplicate = GafferScene.Duplicate()
287+
duplicate["in"].setInput( sphere["out"] )
288+
duplicate["target"].setValue( '/sphere' )
289+
duplicate["copies"].setValue( 1000000 )
290+
291+
pathFilter = GafferScene.PathFilter()
292+
pathFilter["paths"].setValue( IECore.StringVectorData( [ '...' ] ) )
293+
294+
filterResults = GafferScene.FilterResults()
295+
filterResults["scene"].setInput( duplicate["out"] )
296+
filterResults["filter"].setInput( pathFilter["out"] )
297+
298+
# Evaluate the root childNames beforehand to focus our timing on the compute
299+
duplicate["out"].childNames( "/" )
300+
301+
with GafferTest.TestRunner.PerformanceScope():
302+
filterResults["out"].getValue()
303+
280304
if __name__ == "__main__":
281305
unittest.main()

0 commit comments

Comments
 (0)