diff --git a/src/xrCore/Math/MathUtil.cpp b/src/xrCore/Math/MathUtil.cpp index 5cc896def34..e62544f6f72 100644 --- a/src/xrCore/Math/MathUtil.cpp +++ b/src/xrCore/Math/MathUtil.cpp @@ -12,8 +12,11 @@ #include "xrEngine/Render.h" #include "Layers/xrRender/light.h" #endif - +#ifdef XR_X86 #include "SkinXW_SSE.hpp" +#else +#include "SkinXW_CPP.hpp" +#endif #include "Skin4W_MT.hpp" #include "PLC_SSE.hpp" #include "_math.h" @@ -22,12 +25,11 @@ namespace XRay { namespace Math { -#ifdef XR_X86 Skin1WFunc Skin1W; Skin2WFunc Skin2W; Skin3WFunc Skin3W; Skin4WFunc Skin4W; -#endif + PLCCalcFunc PLCCalc; void Initialize() @@ -41,12 +43,18 @@ void Initialize() Skin3W = Skin3W_SSE; Skin4W = Skin4W_SSE; Skin4W_MTs = Skin4W_SSE; +#else + Skin1W = Skin1W_CPP; + Skin2W = Skin2W_CPP; + Skin3W = Skin3W_CPP; + Skin4W = Skin4W_CPP; + Skin4W_MTs = Skin4W_CPP; #endif PLCCalc = PLCCalc_SSE; -#ifdef XR_X86 + if (ttapi_GetWorkerCount() > 1) Skin4W = Skin4W_MT; -#endif + initialized = true; } diff --git a/src/xrCore/Math/MathUtil.hpp b/src/xrCore/Math/MathUtil.hpp index 4a64fef5d46..fa770ee01ae 100644 --- a/src/xrCore/Math/MathUtil.hpp +++ b/src/xrCore/Math/MathUtil.hpp @@ -23,13 +23,11 @@ namespace XRay { namespace Math { -#ifdef XR_X86 extern XRCORE_API Skin1WFunc Skin1W; extern XRCORE_API Skin2WFunc Skin2W; extern XRCORE_API Skin3WFunc Skin3W; extern XRCORE_API Skin4WFunc Skin4W; extern XRCORE_API PLCCalcFunc PLCCalc; -#endif void XRCORE_API Initialize(); } // namespace Math diff --git a/src/xrCore/Math/SkinXW_CPP.cpp b/src/xrCore/Math/SkinXW_CPP.cpp new file mode 100644 index 00000000000..13c7b0c4d0c --- /dev/null +++ b/src/xrCore/Math/SkinXW_CPP.cpp @@ -0,0 +1,249 @@ +#include "stdafx.h" +#include "Common/Platform.hpp" +#include "SkinXW_CPP.hpp" +#ifdef _EDITOR +#include "SkeletonX.h" +#include "SkeletonCustom.h" +#else +#include "Animation/Bone.hpp" +#include "Layers/xrRender/SkeletonXVertRender.h" +#endif + +namespace XRay +{ +namespace Math +{ +void Skin1W_CPP(vertRender* D, vertBoned1W* S, u32 vCount, CBoneInstance* Bones) +{ + // return; + // Prepare + int U_Count = vCount / 8; + vertBoned1W* V = S; + vertBoned1W* E = V + U_Count * 8; + + // Unrolled loop + for (; S != E;) + { + Fmatrix& M0 = Bones[S->matrix].mRenderTransform; + M0.transform_tiny(D->P, S->P); + M0.transform_dir(D->N, S->N); + D->u = S->u; + D->v = S->v; + S++; + D++; + + Fmatrix& M1 = Bones[S->matrix].mRenderTransform; + M1.transform_tiny(D->P, S->P); + M1.transform_dir(D->N, S->N); + D->u = S->u; + D->v = S->v; + S++; + D++; + + Fmatrix& M2 = Bones[S->matrix].mRenderTransform; + M2.transform_tiny(D->P, S->P); + M2.transform_dir(D->N, S->N); + D->u = S->u; + D->v = S->v; + S++; + D++; + + Fmatrix& M3 = Bones[S->matrix].mRenderTransform; + M3.transform_tiny(D->P, S->P); + M3.transform_dir(D->N, S->N); + D->u = S->u; + D->v = S->v; + S++; + D++; + + Fmatrix& M4 = Bones[S->matrix].mRenderTransform; + M4.transform_tiny(D->P, S->P); + M4.transform_dir(D->N, S->N); + D->u = S->u; + D->v = S->v; + S++; + D++; + + Fmatrix& M5 = Bones[S->matrix].mRenderTransform; + M5.transform_tiny(D->P, S->P); + M5.transform_dir(D->N, S->N); + D->u = S->u; + D->v = S->v; + S++; + D++; + + Fmatrix& M6 = Bones[S->matrix].mRenderTransform; + M6.transform_tiny(D->P, S->P); + M6.transform_dir(D->N, S->N); + D->u = S->u; + D->v = S->v; + S++; + D++; + + Fmatrix& M7 = Bones[S->matrix].mRenderTransform; + M7.transform_tiny(D->P, S->P); + M7.transform_dir(D->N, S->N); + D->u = S->u; + D->v = S->v; + S++; + D++; + } + + // The end part + vertBoned1W* E2 = V + vCount; + for (; S != E2;) + { + Fmatrix& M = Bones[S->matrix].mRenderTransform; + M.transform_tiny(D->P, S->P); + M.transform_dir(D->N, S->N); + D->u = S->u; + D->v = S->v; + S++; + D++; + } +} + +void Skin2W_CPP(vertRender* D, vertBoned2W* S, u32 vCount, CBoneInstance* Bones) +{ + // Prepare + int U_Count = vCount; + vertBoned2W* V = S; + vertBoned2W* E = V + U_Count; + Fvector P0, N0, P1, N1; + + // NON-Unrolled loop + for (; S != E;) + { + if (S->matrix1 != S->matrix0) + { + Fmatrix& M0 = Bones[S->matrix0].mRenderTransform; + Fmatrix& M1 = Bones[S->matrix1].mRenderTransform; + M0.transform_tiny(P0, S->P); + M0.transform_dir(N0, S->N); + M1.transform_tiny(P1, S->P); + M1.transform_dir(N1, S->N); + D->P.lerp(P0, P1, S->w); + D->N.lerp(N0, N1, S->w); + D->u = S->u; + D->v = S->v; + } + else + { + Fmatrix& M0 = Bones[S->matrix0].mRenderTransform; + M0.transform_tiny(D->P, S->P); + M0.transform_dir(D->N, S->N); + D->u = S->u; + D->v = S->v; + } + S++; + D++; + } +} + + +void Skin3W_CPP(vertRender* D, vertBoned3W* S, u32 vCount, CBoneInstance* Bones) +{ + // Prepare + int U_Count = vCount; + vertBoned3W* V = S; + vertBoned3W* E = V + U_Count; + Fvector P0, N0, P1, N1, P2, N2; + + // NON-Unrolled loop + for (; S != E;) + { + Fmatrix& M0 = Bones[S->m[0]].mRenderTransform; + Fmatrix& M1 = Bones[S->m[1]].mRenderTransform; + Fmatrix& M2 = Bones[S->m[2]].mRenderTransform; + + M0.transform_tiny(P0, S->P); + P0.mul(S->w[0]); + M0.transform_dir(N0, S->N); + N0.mul(S->w[0]); + + M1.transform_tiny(P1, S->P); + P1.mul(S->w[1]); + M1.transform_dir(N1, S->N); + N1.mul(S->w[1]); + + M2.transform_tiny(P2, S->P); + P2.mul(1.0f - S->w[0] - S->w[1]); + M2.transform_dir(N2, S->N); + N2.mul(1.0f - S->w[0] - S->w[1]); + + P0.add(P1); + P0.add(P2); + + D->P = P0; + + N0.add(N1); + N0.add(N2); + + D->N = N0; + + D->u = S->u; + D->v = S->v; + + S++; + D++; + } +} + + +void Skin4W_CPP(vertRender* D, vertBoned4W* S, u32 vCount, CBoneInstance* Bones) +{ + // Prepare + int U_Count = vCount; + vertBoned4W* V = S; + vertBoned4W* E = V + U_Count; + Fvector P0, N0, P1, N1, P2, N2, P3, N3; + + // NON-Unrolled loop + for (; S != E;) + { + Fmatrix& M0 = Bones[S->m[0]].mRenderTransform; + Fmatrix& M1 = Bones[S->m[1]].mRenderTransform; + Fmatrix& M2 = Bones[S->m[2]].mRenderTransform; + Fmatrix& M3 = Bones[S->m[3]].mRenderTransform; + + M0.transform_tiny(P0, S->P); + P0.mul(S->w[0]); + M0.transform_dir(N0, S->N); + N0.mul(S->w[0]); + + M1.transform_tiny(P1, S->P); + P1.mul(S->w[1]); + M1.transform_dir(N1, S->N); + N1.mul(S->w[1]); + + M2.transform_tiny(P2, S->P); + P2.mul(S->w[2]); + M2.transform_dir(N2, S->N); + N2.mul(S->w[2]); + + M3.transform_tiny(P3, S->P); + P3.mul(1.0f - S->w[0] - S->w[1] - S->w[2]); + M3.transform_dir(N3, S->N); + N3.mul(1.0f - S->w[0] - S->w[1] - S->w[2]); + + P0.add(P1); + P0.add(P2); + P0.add(P3); + + D->P = P0; + + N0.add(N1); + N0.add(N2); + N0.add(N3); + + D->N = N0; + + D->u = S->u; + D->v = S->v; + + S++; + D++; + } +} +} // namespace Math +} // namespace XRay diff --git a/src/xrCore/Math/SkinXW_CPP.hpp b/src/xrCore/Math/SkinXW_CPP.hpp new file mode 100644 index 00000000000..d956b22de0a --- /dev/null +++ b/src/xrCore/Math/SkinXW_CPP.hpp @@ -0,0 +1,20 @@ +#pragma once +#include "xrCore.h" + +struct vertRender; +struct vertBoned1W; +struct vertBoned2W; +struct vertBoned3W; +struct vertBoned4W; +class CBoneInstance; + +namespace XRay +{ +namespace Math +{ +void Skin1W_CPP(vertRender* D, vertBoned1W* S, u32 vCount, CBoneInstance* Bones); +void Skin2W_CPP(vertRender* D, vertBoned2W* S, u32 vCount, CBoneInstance* Bones); +void Skin3W_CPP(vertRender* D, vertBoned3W* S, u32 vCount, CBoneInstance* Bones); +void Skin4W_CPP(vertRender* D, vertBoned4W* S, u32 vCount, CBoneInstance* Bones); +} // namespace Math +} // namespace XRay diff --git a/src/xrCore/Math/SkinXW_SSE.cpp b/src/xrCore/Math/SkinXW_SSE.cpp index 088c7c510f0..bafaa69d7e1 100644 --- a/src/xrCore/Math/SkinXW_SSE.cpp +++ b/src/xrCore/Math/SkinXW_SSE.cpp @@ -411,5 +411,5 @@ void Skin1W_SSE(vertRender* D, vertBoned1W* S, u32 vCount, CBoneInstance* Bones) } } #endif -} // namespace Util3D +} // namespace Math } // namespace XRay diff --git a/src/xrCore/xrCore.vcxproj b/src/xrCore/xrCore.vcxproj index e9feb59d2f4..54befb80532 100644 --- a/src/xrCore/xrCore.vcxproj +++ b/src/xrCore/xrCore.vcxproj @@ -334,6 +334,7 @@ + @@ -503,6 +504,7 @@ + diff --git a/src/xrCore/xrCore.vcxproj.filters b/src/xrCore/xrCore.vcxproj.filters index 9fd2dd065cb..3a199620a6b 100644 --- a/src/xrCore/xrCore.vcxproj.filters +++ b/src/xrCore/xrCore.vcxproj.filters @@ -357,6 +357,9 @@ PCH + + Math + @@ -752,6 +755,9 @@ Math\Extensions + + Math +