tr_surface: parallel IQM model CPU runtime computation

illwieckz · illwieckz · commit 291d6a724fa0 · 2025-09-30T05:29:00.000+02:00
diff --git a/src/engine/renderer/tr_surface.cpp b/src/engine/renderer/tr_surface.cpp
@@ -21,6 +21,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 ===========================================================================
 */
 // tr_surface.c
+#include "framework/Omp.h"
 #include "tr_local.h"
 #include "gl_shader.h"
 #include "Material.h"
@@ -42,6 +43,9 @@ use the shader system.
 
 static transform_t bones[ MAX_BONES ];
 
+// Test cvar.
+Cvar::Cvar<bool> r_parallelNotbntoq( "r_parallelNotbntoq", "r_parallelNotbntoq", Cvar::NONE, true );
+
 /*
 ==============
 Tess_EndBegin
@@ -1242,6 +1246,10 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) {
 	shaderVertex_t *tessVertex = tess.verts + tess.numVertexes;
 	shaderVertex_t *lastVertex = tessVertex + surf->num_vertexes;
 
+	size_t num_chunks = Omp::GetThreads();
+
+	size_t chunk_size = surf->num_vertexes / num_chunks;
+
 	// Deform the vertices by the lerped bones.
 	if ( model->num_joints > 0 && model->blendWeights && model->blendIndexes )
 	{
@@ -1252,6 +1260,7 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) {
 			byte *modelBlendIndex = model->blendIndexes + 4 * firstVertex;
 			byte *modelBlendWeight = model->blendWeights + 4 * firstVertex;
 
+if ( !r_parallelNotbntoq.Get() ) {
 			for ( ; tessVertex < lastVertex; tessVertex++,
 				modelPosition += 3, modelNormal += 3,
 				modelTangent += 3, modelBitangent += 3,
@@ -1280,71 +1289,166 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) {
 
 				Vector2Copy( modelTexcoord, tessVertex->texCoords );
 			}
+} else {
+			auto task = [&]( const size_t& chunk_index ) -> void
+			{
+				size_t chunk_offset = chunk_index * chunk_size;
+
+				shaderVertex_t *chunk_tessVertex = tessVertex + chunk_offset;
+				float *chunk_modelPosition = modelPosition + 3 * chunk_offset;
+				float *chunk_modelNormal = modelNormal + 3 * chunk_offset;
+				float *chunk_modelTangent = modelTangent + 3 * chunk_offset;
+				float *chunk_modelBitangent = modelBitangent + 3 * chunk_offset;
+				float *chunk_modelTexcoord = modelTexcoord + 2 * chunk_offset;
+				byte *chunk_modelBlendIndex = modelBlendIndex + 4 * chunk_offset;
+				byte *chunk_modelBlendWeight = modelBlendWeight + 4 * chunk_offset;
+
+				shaderVertex_t *chunk_lastVertex =
+					chunk_index == num_chunks - 1
+					? lastVertex
+					: chunk_tessVertex + chunk_size;
+
+				for ( ; chunk_tessVertex < chunk_lastVertex;
+					chunk_tessVertex++,
+					chunk_modelPosition += 3, chunk_modelNormal += 3,
+					chunk_modelTangent += 3, chunk_modelBitangent += 3,
+					chunk_modelTexcoord += 2 )
+				{
+					vec3_t position = {};
+
+					byte *chunk_lastBlendIndex = chunk_modelBlendIndex + 4;
+
+					for ( ; chunk_modelBlendIndex < chunk_lastBlendIndex;
+						chunk_modelBlendIndex++,
+						chunk_modelBlendWeight++ )
+					{
+						if ( *chunk_modelBlendWeight == 0 )
+						{
+							continue;
+						}
+
+						float weight = *chunk_modelBlendWeight * weightFactor;
+						vec3_t tmp;
+
+						TransformPoint( &bones[ *chunk_modelBlendIndex ], chunk_modelPosition, tmp );
+						VectorMA( position, weight, tmp, position );
+					}
+
+					VectorCopy( position, tessVertex->xyz );
+
+					Vector2Copy( chunk_modelTexcoord, tessVertex->texCoords );
+				}
+			};
+
+			Omp::Tasker( task, num_chunks );
+}
 		}
 		else
 		{
 			byte *modelBlendIndex = model->blendIndexes + 4 * firstVertex;
 			byte *modelBlendWeight = model->blendWeights + 4 * firstVertex;
 
-			for ( ; tessVertex < lastVertex; tessVertex++,
-				modelPosition += 3, modelNormal += 3,
-				modelTangent += 3, modelBitangent += 3,
-				modelTexcoord += 2 )
+			auto task = [&]( const size_t& chunk_index ) -> void
 			{
-				vec3_t position = {}, tangent = {}, binormal = {}, normal = {};
+				size_t chunk_offset = chunk_index * chunk_size;
+
+				shaderVertex_t *chunk_tessVertex = tessVertex + chunk_offset;
+				float *chunk_modelPosition = modelPosition + 3 * chunk_offset;
+				float *chunk_modelNormal = modelNormal + 3 * chunk_offset;
+				float *chunk_modelTangent = modelTangent + 3 * chunk_offset;
+				float *chunk_modelBitangent = modelBitangent + 3 * chunk_offset;
+				float *chunk_modelTexcoord = modelTexcoord + 2 * chunk_offset;
+				byte *chunk_modelBlendIndex = modelBlendIndex + 4 * chunk_offset;
+				byte *chunk_modelBlendWeight = modelBlendWeight + 4 * chunk_offset;
+
+				shaderVertex_t *chunk_lastVertex =
+					chunk_index == num_chunks - 1
+					? lastVertex
+					: chunk_tessVertex + chunk_size;
+
+				for ( ; chunk_tessVertex < chunk_lastVertex;
+					chunk_tessVertex++,
+					chunk_modelPosition += 3, chunk_modelNormal += 3,
+					chunk_modelTangent += 3, chunk_modelBitangent += 3,
+					chunk_modelTexcoord += 2 )
+				{
+					vec3_t position = {}, tangent = {}, binormal = {}, normal = {};
 
-				byte *lastBlendIndex = modelBlendIndex + 4;
+					byte *chunk_lastBlendIndex = chunk_modelBlendIndex + 4;
 
-				for ( ; modelBlendIndex < lastBlendIndex; modelBlendIndex++,
-					modelBlendWeight++ )
-				{
-					if ( *modelBlendWeight == 0 )
+					for ( ; chunk_modelBlendIndex < chunk_lastBlendIndex;
+						chunk_modelBlendIndex++,
+						chunk_modelBlendWeight++ )
 					{
-						continue;
-					}
+						if ( *chunk_modelBlendWeight == 0 )
+						{
+							continue;
+						}
 
-					float weight = *modelBlendWeight * weightFactor;
-					vec3_t tmp;
+						float weight = *chunk_modelBlendWeight * weightFactor;
+						vec3_t tmp;
 
-					TransformPoint( &bones[ *modelBlendIndex ], modelPosition, tmp );
-					VectorMA( position, weight, tmp, position );
+						TransformPoint( &bones[ *chunk_modelBlendIndex ], chunk_modelPosition, tmp );
+						VectorMA( position, weight, tmp, position );
 
-					TransformNormalVector( &bones[ *modelBlendIndex ], modelNormal, tmp );
-					VectorMA( normal, weight, tmp, normal );
+						TransformNormalVector( &bones[ *chunk_modelBlendIndex ], chunk_modelNormal, tmp );
+						VectorMA( normal, weight, tmp, normal );
 
-					TransformNormalVector( &bones[ *modelBlendIndex ], modelTangent, tmp );
-					VectorMA( tangent, weight, tmp, tangent );
+						TransformNormalVector( &bones[ *chunk_modelBlendIndex ], chunk_modelTangent, tmp );
+						VectorMA( tangent, weight, tmp, tangent );
 
-					TransformNormalVector( &bones[ *modelBlendIndex ], modelBitangent, tmp );
-					VectorMA( binormal, weight, tmp, binormal );
-				}
+						TransformNormalVector( &bones[ *chunk_modelBlendIndex ], chunk_modelBitangent, tmp );
+						VectorMA( binormal, weight, tmp, binormal );
+					}
 
-				VectorNormalizeFast( normal );
-				VectorNormalizeFast( tangent );
-				VectorNormalizeFast( binormal );
-				VectorCopy( position, tessVertex->xyz );
+					VectorNormalizeFast( normal );
+					VectorNormalizeFast( tangent );
+					VectorNormalizeFast( binormal );
+					VectorCopy( position, chunk_tessVertex->xyz );
 
-				R_TBNtoQtangentsFast( tangent, binormal, normal, tessVertex->qtangents );
+					R_TBNtoQtangentsFast( tangent, binormal, normal, chunk_tessVertex->qtangents );
 
-				Vector2Copy( modelTexcoord, tessVertex->texCoords );
-			}
+					Vector2Copy( chunk_modelTexcoord, chunk_tessVertex->texCoords );
+				}
+			};
+
+			Omp::Tasker( task, num_chunks );
 		}
 	}
 	else
 	{
 		float scale = model->internalScale * backEnd.currentEntity->e.skeleton.scale;
 
-		for ( ; tessVertex < lastVertex; tessVertex++,
-			modelPosition += 3, modelNormal += 3,
-			modelTangent += 3, modelBitangent += 3,
-			modelTexcoord += 2 )
+		auto task = [&]( const size_t& chunk_index  ) -> void
 		{
-			VectorScale( modelPosition, scale, tessVertex->xyz );
+			size_t chunk_offset = chunk_index * chunk_size;
+
+			shaderVertex_t *chunk_tessVertex = tessVertex + chunk_offset;
+			float *chunk_modelPosition = modelPosition + 3 * chunk_offset;
+			float *chunk_modelNormal = modelNormal + 3 * chunk_offset;
+			float *chunk_modelTangent = modelTangent + 3 * chunk_offset;
+			float *chunk_modelBitangent = modelBitangent + 3 * chunk_offset;
+			float *chunk_modelTexcoord = modelTexcoord + 2 * chunk_offset;
+
+			shaderVertex_t *chunk_lastVertex =
+				chunk_index == num_chunks - 1
+				? lastVertex
+				: chunk_tessVertex + chunk_size;
+
+			for ( ; chunk_tessVertex < chunk_lastVertex; chunk_tessVertex++,
+				chunk_modelPosition += 3, chunk_modelNormal += 3,
+				chunk_modelTangent += 3, chunk_modelBitangent += 3,
+				chunk_modelTexcoord += 2 )
+			{
+				VectorScale( chunk_modelPosition, scale, chunk_tessVertex->xyz );
 
-			R_TBNtoQtangentsFast( modelTangent, modelBitangent, modelNormal, tessVertex->qtangents );
+				R_TBNtoQtangentsFast( chunk_modelTangent, chunk_modelBitangent, chunk_modelNormal, chunk_tessVertex->qtangents );
 
-			Vector2Copy( modelTexcoord, tessVertex->texCoords );
-		}
+				Vector2Copy( chunk_modelTexcoord, chunk_tessVertex->texCoords );
+			}
+		};
+
+		Omp::Tasker( task, num_chunks );
 	}
 
 	tess.numIndexes  += numIndexes;