From e54455ee31d5c494eefeb3f3db7a066e86cdde96 Mon Sep 17 00:00:00 2001 From: THoehne <77296181+THoehne@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:32:55 +0200 Subject: [PATCH] SIMD improvements --- .../Core/public/Math/Detail/Vector2Decl.inl | 82 ++++- .../Runtime/Core/public/Math/Vector2.hpp | 294 +++++++----------- .../Runtime/Core/public/Math/Vector2.inl | 73 +++++ 3 files changed, 264 insertions(+), 185 deletions(-) diff --git a/Engine/Source/Runtime/Core/public/Math/Detail/Vector2Decl.inl b/Engine/Source/Runtime/Core/public/Math/Detail/Vector2Decl.inl index c19a769..718f058 100644 --- a/Engine/Source/Runtime/Core/public/Math/Detail/Vector2Decl.inl +++ b/Engine/Source/Runtime/Core/public/Math/Detail/Vector2Decl.inl @@ -28,6 +28,26 @@ namespace Phanes::Core::Math::Detail struct compute_vec2_dec {}; + // Magnitude + template + struct compute_vec2_mag {}; + + // Dot product + template + struct compute_vec2_dotp {}; + + // Max + template + struct compute_vec2_max {}; + + // Min + template + struct compute_vec2_min {}; + + // Set + template + struct compute_vec2_set {}; + template struct construct_vec2 @@ -97,6 +117,12 @@ namespace Phanes::Core::Math::Detail r.x = v1.x - s; r.y = v1.y - s; } + + static constexpr void map(Phanes::Core::Math::TVector2& r, T s, const Phanes::Core::Math::TVector2& v1) + { + r.x = s - v1.x; + r.y = s - v1.y; + } }; @@ -137,6 +163,12 @@ namespace Phanes::Core::Math::Detail r.x = v1.x * s; r.y = v1.y * s; } + + static constexpr void map(Phanes::Core::Math::TVector2& r, T s, const Phanes::Core::Math::TVector2& v1) + { + r.x = s / v1.x; + r.y = s / v1.y; + } }; template @@ -182,5 +214,53 @@ namespace Phanes::Core::Math::Detail r.y = v1.y - 1; } }; -} + template + struct compute_vec2_mag + { + static constexpr T map(const Phanes::Core::Math::TVector2& v1) + { + return sqrt(v1.x * v1.x + v1.y * v1.y); + } + }; + + template + struct compute_vec2_dotp + { + static constexpr T map(const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + return v1.x * v2.x + v1.y * v2.y; + } + }; + + + template + struct compute_vec2_max + { + static constexpr void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.x = (v1.x > v2.x) ? v1.x : v2.x; + r.y = (v1.y > v2.y) ? v1.y : v2.y; + } + }; + + template + struct compute_vec2_min + { + static constexpr void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.x = (v1.x < v2.x) ? v1.x : v2.x; + r.y = (v1.y < v2.y) ? v1.y : v2.y; + } + }; + + template + struct compute_vec2_set + { + static constexpr void map(Phanes::Core::Math::TVector2& v1, T x, T y) + { + v1.x = x; + v1.y = y; + } + }; +} diff --git a/Engine/Source/Runtime/Core/public/Math/Vector2.hpp b/Engine/Source/Runtime/Core/public/Math/Vector2.hpp index 4c5bacf..3aaa6ae 100644 --- a/Engine/Source/Runtime/Core/public/Math/Vector2.hpp +++ b/Engine/Source/Runtime/Core/public/Math/Vector2.hpp @@ -273,10 +273,12 @@ namespace Phanes::Core::Math { template - inline TVector2 operator/ (T s, const TVector2& v1) - { - return v1 / s; - } + inline TVector2 operator/ (T s, const TVector2& v1); + + + + template + inline TVector2 operator- (T s, const TVector2& v1); /** * Componentwise addition of Vector with floating point. @@ -378,11 +380,8 @@ namespace Phanes::Core::Math { * @return Size of Vector */ - template - T Magnitude(const TVector2& v1) - { - return sqrtf(v1.x * v1.x + v1.y * v1.y); - } + template + T Magnitude(const TVector2& v1); /** * @see [FUNC]Magnitude @@ -398,11 +397,8 @@ namespace Phanes::Core::Math { * @return Magnitude without calculating square root */ - template - T SqrMagnitude(const TVector2& v1) - { - return v1.x * v1.x + v1.y * v1.y; - } + template + T SqrMagnitude(const TVector2& v1); /** * @see [FUNC]SqrMagnitude @@ -416,10 +412,10 @@ namespace Phanes::Core::Math { * @param(v1) Vector */ - template - TVector2 NormalizeV(TVector2& v1) + template + TVector2& NormalizeV(TVector2& v1) { - float vecNorm = Magnitude(v1); + T vecNorm = Magnitude(v1); v1 /= (vecNorm < P_FLT_INAC) ? (T)1.0 : vecNorm; return v1; } @@ -432,12 +428,10 @@ namespace Phanes::Core::Math { * @note Does not look for zero vector. */ - template - TVector2 UnsafeNormalizeV(TVector2& v1) + template + TVector2& UnsafeNormalizeV(TVector2& v1) { - v1 /= Magnitude(v1); - - return v1; + return (v1 /= Magnitude(v1)); } /** @@ -447,8 +441,8 @@ namespace Phanes::Core::Math { * @param(v2) Vector two */ - template - T Angle(const TVector2& v1, const TVector2& v2) + template + T Angle(const TVector2& v1, const TVector2& v2) { return acos(DotP(v1, v2) / (Magnitude(v1) * Magnitude(v2))); } @@ -460,8 +454,8 @@ namespace Phanes::Core::Math { * @param(v2) Vector two */ - template - T CosineAngle(const TVector2& v1, const TVector2& v2) + template + T CosineAngle(const TVector2& v1, const TVector2& v2) { return DotP(v1, v2) / (Magnitude(v1) * Magnitude(v2)); } @@ -472,8 +466,8 @@ namespace Phanes::Core::Math { * @param(v1) Vector one */ - template - TVector2 SignVectorV(TVector2& v1) + template + TVector2& SignVectorV(TVector2& v1) { v1.x = (v1.x >= (T)0.0) ? (T)1.0 : -(T)1.0; v1.y = (v1.y >= (T)0.0) ? (T)1.0 : -(T)1.0; @@ -488,10 +482,10 @@ namespace Phanes::Core::Math { * @param(radius) Radius of square (=> Distance from middle to center of each site.) */ - template - TVector2 BindToSquareV(TVector2& v1, T radius) + template + TVector2& BindToSquareV(TVector2& v1, T radius) { - float k = (abs(v1.x) > abs(v1.y)) ? abs(radius / v1.x) : abs(radius / v1.y); + T k = (Abs(v1.x) > Abs(v1.y)) ? Abs(radius / v1.x) : Abs(radius / v1.y); v1 *= k; return v1; @@ -504,11 +498,11 @@ namespace Phanes::Core::Math { * @param(radius) Radius of square (=> Distance from middle to center of each site.) */ - template - TVector2 ClampToSquareV(TVector2& v1, T radius) + template + TVector2& ClampToSquareV(TVector2& v1, T radius) { - float prime = (abs(v1.x) > abs(v1.y)) ? v1.x : v1.y; - float k = (prime > radius) ? abs(radius / prime) : 1.0f; + T prime = (Abs(v1.x) > Abs(v1.y)) ? v1.x : v1.y; + T k = (prime > radius) ? Abs(radius / prime) : (T)1.0; v1 *= k; return v1; @@ -521,11 +515,8 @@ namespace Phanes::Core::Math { * @param(v2) Vector two */ - template - inline T DotP(const TVector2& v1, const TVector2& v2) - { - return v1.x * v2.x + v1.y * v2.y; - } + template + inline T DotP(const TVector2& v1, const TVector2& v2); /** * Creates Vector, with component wise largest values. @@ -536,14 +527,8 @@ namespace Phanes::Core::Math { * @note Stores new Vector to v1 */ - template - TVector2 MaxV(TVector2& v1, const TVector2& v2) - { - v1.x = Phanes::Core::Math::Max(v1.x, v2.x); - v1.y = Phanes::Core::Math::Max(v1.y, v2.y); - - return v1; - } + template + TVector2& MaxV(TVector2& v1, const TVector2& v2); /** * Creates Vector, with component wise smallest values. @@ -554,14 +539,8 @@ namespace Phanes::Core::Math { * @note Stores new Vector to v1 */ - template - TVector2 MinV(TVector2& v1, const TVector2& v2) - { - v1.x = Phanes::Core::Math::Min(v1.x, v2.x); - v1.y = Phanes::Core::Math::Min(v1.y, v2.y); - - return v1; - } + template + TVector2& MinV(TVector2& v1, const TVector2& v2); /** * Gets perpendicular Vector to v1. @@ -572,13 +551,9 @@ namespace Phanes::Core::Math { */ template - TVector2 GetPerpendicularV(TVector2& v1) + TVector2& GetPerpendicularV(TVector2& v1) { - T x = -v1.x; - v1.x = v1.y; - v1.y = x; - - return v1; + return Set(v1, v1.y, -v1.x); } /** @@ -592,7 +567,7 @@ namespace Phanes::Core::Math { */ template - TVector2 GetReversePerpendicularV(TVector2& v1) + TVector2& GetReversePerpendicularV(TVector2& v1) { T x = v1.x; v1.x = -v1.y; @@ -601,24 +576,6 @@ namespace Phanes::Core::Math { return v1; } - /** - * Component wise multiplication of Vector - * - * @param(v1) Vector one - * @param(v2) Vector two - * - * @note Stores new Vector to v1 - */ - - template - TVector2 ScaleV(TVector2& v1, const TVector2& v2) - { - v1.x *= v2.x; - v1.y *= v2.y; - - return v1; - } - /** * Componentwise inverse of Vector * @@ -627,13 +584,10 @@ namespace Phanes::Core::Math { * @note Stores new Vector to v1 */ - template - TVector2 CompInverseV(TVector2& v1) + template + TVector2& CompInverseV(TVector2& v1) { - v1.x = 1.0f / v1.x; - v1.y = 1.0f / v1.y; - - return v1; + return (v1 = (T)1.0 / v1); } /** @@ -645,8 +599,8 @@ namespace Phanes::Core::Math { * @note Stores new Vector to v1 */ - template - TVector2 ReflectV(TVector2& v1, const TVector2& normal) + template + TVector2& ReflectV(TVector2& v1, const TVector2& normal) { v1 = ((T)2.0 * DotP(v1, normal) * normal) - v1; @@ -660,8 +614,8 @@ namespace Phanes::Core::Math { * @param(v2) Vector to copy */ - template - TVector2 Set(TVector2& v1, const TVector2& v2) + template + TVector2& Set(TVector2& v1, const TVector2& v2) { v1 = v2; @@ -670,19 +624,15 @@ namespace Phanes::Core::Math { /** * Sets components of a vector. + * + * Automatically used _mm_setr_ps, if vector is xmm register. * * @param(v1) Vector to copy to * @param(v2) Vector to copy */ - template - TVector2 Set(TVector2& v1, T x, T y) - { - v1.x = x; - v1.y = y; - - return v1; - } + template + TVector2& Set(TVector2& v1, T x, T y); /** * Anti-clockwise vector rotation. @@ -692,11 +642,11 @@ namespace Phanes::Core::Math { * @note Angle is not clamped */ - template - TVector2 RotateV(TVector2& v1, T angle) + template + TVector2& RotateV(TVector2& v1, T angle) { - float sinAngle = sin(angle); - float cosAngle = cos(angle); + T sinAngle = sin(angle); + T cosAngle = cos(angle); Set(v1, v1.x * cosAngle - v1.y * sinAngle, @@ -715,8 +665,8 @@ namespace Phanes::Core::Math { * @note Angle is not clamped */ - template - FORCEINLINE TVector2 ClockwiseRotateV(TVector2& v1, T angle) + template + FORCEINLINE TVector2& ClockwiseRotateV(TVector2& v1, T angle) { RotateV(v1, -angle); @@ -730,12 +680,9 @@ namespace Phanes::Core::Math { */ template - TVector2 NegateV(TVector2& v1) + TVector2& NegateV(TVector2& v1) { - v1.x = -v1.x; - v1.y = -v1.y; - - return v1; + return Set(v1, -v1.x, -v1.y); } /** @@ -747,10 +694,10 @@ namespace Phanes::Core::Math { * @return true if unit vector, false if not */ - template - inline bool IsNormalized(const TVector2& v1, T threshold = P_FLT_INAC) + template + inline bool IsNormalized(const TVector2& v1, T threshold = P_FLT_INAC) { - return (abs(SqrMagnitude(v1) - 1) < threshold); + return (Abs(SqrMagnitude(v1) - 1) < threshold); } /** @@ -765,10 +712,10 @@ namespace Phanes::Core::Math { * @note Requires v1 and v2 to be normal vectors. */ - template - inline bool IsPerpendicular(const TVector2& v1, const TVector2& v2, T threshold = P_FLT_INAC) + template + inline bool IsPerpendicular(const TVector2& v1, const TVector2& v2, T threshold = P_FLT_INAC) { - return (abs(DotP(v1, v2)) < threshold); + return (Abs(DotP(v1, v2)) < threshold); } /** @@ -783,10 +730,10 @@ namespace Phanes::Core::Math { * @note Requires v1 and v2 to be normal vectors. */ - template - inline bool IsParallel(const TVector2& v1, const TVector2& v2, T threshold = 1.0f - P_FLT_INAC) + template + inline bool IsParallel(const TVector2& v1, const TVector2& v2, T threshold = 1.0f - P_FLT_INAC) { - return (abs(DotP(v1, v2)) > threshold); + return (Abs(DotP(v1, v2)) > threshold); } /** @@ -801,8 +748,8 @@ namespace Phanes::Core::Math { * @note Requires v1 and v2 to be normal vectors. */ - template - inline bool IsCoincident(const TVector2& v1, const TVector2& v2, T threshold = 1.0f - P_FLT_INAC) + template + inline bool IsCoincident(const TVector2& v1, const TVector2& v2, T threshold = 1.0f - P_FLT_INAC) { return (DotP(v1, v2) > threshold); } @@ -834,27 +781,12 @@ namespace Phanes::Core::Math { * @return Reflected vector */ - template - TVector2 Reflect(const TVector2& v1, const TVector2& normal) + template + TVector2 Reflect(const TVector2& v1, const TVector2& normal) { return (((T)2.0 * DotP(v1, normal) * normal) - v1); } - /** - * Scales a vector component wise - * - * @param(v1) Vector one - * @param(v2) Vector two - * - * @return Reflected vector - */ - - template - TVector2 Scale(const TVector2& v1, const TVector2& v2) - { - return TVector2(v1.x * v2.x, v1.y * v2.y); - } - /** * Componentwise inverse of a vector * @@ -863,10 +795,10 @@ namespace Phanes::Core::Math { * @return Componentwise inverted vector */ - template - TVector2 CompInverse(const TVector2& v1) + template + TVector2 CompInverse(const TVector2& v1) { - return TVector2(1.0f / v1.x, 1.0f / v1.y); + return ((T)1.0 / v1); } /** @@ -877,10 +809,10 @@ namespace Phanes::Core::Math { * @return Componentwise inverted vector */ - template - TVector2 Negate(const TVector2& v1) + template + TVector2 Negate(const TVector2& v1) { - return TVector2(-v1.x, -v1.y); + return ((T)1.0 - v1); } /** @@ -891,10 +823,10 @@ namespace Phanes::Core::Math { * @return Perpendicular vector */ - template - TVector2 GetPerpendicular(const TVector2& v1) + template + TVector2 GetPerpendicular(const TVector2& v1) { - return TVector2(v1.y, -v1.x); + return TVector2(v1.y, -v1.x); } /** @@ -905,10 +837,10 @@ namespace Phanes::Core::Math { * @return Reversed perpendicular vector */ - template - TVector2 GetReversePerpendicular(const TVector2& v1) + template + TVector2 GetReversePerpendicular(const TVector2& v1) { - return TVector2(-v1.y, v1.x); + return TVector2(-v1.y, v1.x); } /** @@ -920,11 +852,8 @@ namespace Phanes::Core::Math { * @return Minimal vector */ - template - TVector2 Min(const TVector2& v1, const TVector2& v2) - { - return TVector2(Phanes::Core::Math::Min(v1.x, v2.x), Phanes::Core::Math::Min(v1.y, v2.y)); - } + template + TVector2 Min(const TVector2& v1, const TVector2& v2); /** * Creates a new Vector by the component wise maxima of both vectors @@ -935,11 +864,8 @@ namespace Phanes::Core::Math { * @return Maximal vector */ - template - TVector2 Max(const TVector2& v1, const TVector2& v2) - { - return TVector2(Phanes::Core::Math::Max(v1.x, v2.x), Phanes::Core::Math::Max(v1.y, v2.y)); - } + template + TVector2 Max(const TVector2& v1, const TVector2& v2); /** * Creates a normalized instance of the vector @@ -949,10 +875,10 @@ namespace Phanes::Core::Math { * @return Unit vector */ - template - TVector2 Normalize(const TVector2& v1) + template + TVector2 Normalize(const TVector2& v1) { - float vecNorm = Magnitude(v1); + T vecNorm = Magnitude(v1); return (vecNorm < P_FLT_INAC) ? v1 : (v1 / vecNorm); } @@ -965,8 +891,8 @@ namespace Phanes::Core::Math { * @note Does not test for zero vector */ - template - TVector2 UnsafeNormalize(const TVector2& v1) + template + TVector2 UnsafeNormalize(const TVector2& v1) { return (v1 / Magnitude(v1)); } @@ -979,10 +905,10 @@ namespace Phanes::Core::Math { * @return Vector with signs as components */ - template - TVector2 SignVector(const TVector2& v1) + template + TVector2 SignVector(const TVector2& v1) { - return TVector2((v1.x >= (T)0.0) ? (T)1.0 : (T)-1.0, (v1.y >= (T)0.0) ? (T)1.0 : (T)-1.0); + return TVector2((v1.x >= (T)0.0) ? (T)1.0 : (T)-1.0, (v1.y >= (T)0.0) ? (T)1.0 : (T)-1.0); } /** @@ -994,10 +920,10 @@ namespace Phanes::Core::Math { * @return Bound vector */ - template - TVector2 BindToSquare(const TVector2& v1, T radius) + template + TVector2 BindToSquare(const TVector2& v1, T radius) { - float k = (abs(v1.x) > abs(v1.y)) ? abs(radius / v1.x) : abs(radius / v1.y); + T k = (Abs(v1.x) > Abs(v1.y)) ? Abs(radius / v1.x) : Abs(radius / v1.y); return v1 * k; } @@ -1010,11 +936,11 @@ namespace Phanes::Core::Math { * @return Clamped vector. If the length of the vector fits the square, then the vector is returned. */ - template - TVector2 ClampToSquare(const TVector2& v1, T radius) + template + TVector2 ClampToSquare(const TVector2& v1, T radius) { - float prime = (abs(v1.x) > abs(v1.y)) ? v1.x : v1.y; - float k = (prime > radius) ? abs(radius / prime) : 1.0f; + T prime = (Abs(v1.x) > Abs(v1.y)) ? v1.x : v1.y; + T k = (prime > radius) ? Abs(radius / prime) : 1.0f; return v1 * k; } @@ -1031,8 +957,8 @@ namespace Phanes::Core::Math { * @note Interpolation is clamped between 0 - 1. */ - template - TVector2 Lerp(const TVector2& startVec, const TVector2& destVec, T t) + template + TVector2 Lerp(const TVector2& startVec, const TVector2& destVec, T t) { t = Phanes::Core::Math::Clamp(t, (T)0.0, (T)1.0); @@ -1051,8 +977,8 @@ namespace Phanes::Core::Math { * @note Interpolation is not clamped. Make shure t is between 0.0f and 1.0f */ - template - TVector2 LerpUnclamped(const TVector2& startVec, const TVector2& destVec, T t) + template + TVector2 LerpUnclamped(const TVector2& startVec, const TVector2& destVec, T t) { return (t * destVec) + ((1 - t) * startVec); } @@ -1068,13 +994,13 @@ namespace Phanes::Core::Math { * @note Angle is not clamped */ - template - TVector2 Rotate(const TVector2& v1, T angle) + template + TVector2 Rotate(const TVector2& v1, T angle) { float sinAngle = sin(angle); float cosAngle = cos(angle); - return TVector2(v1.x * cosAngle - v1.y * sinAngle, + return TVector2(v1.x * cosAngle - v1.y * sinAngle, v1.y * cosAngle + v1.x * sinAngle); } @@ -1088,8 +1014,8 @@ namespace Phanes::Core::Math { * @note Angle is not clamped */ - template - TVector2 ClockwiseRotate(const TVector2& v1, T angle) + template + TVector2 ClockwiseRotate(const TVector2& v1, T angle) { return Rotate(v1, -angle); } diff --git a/Engine/Source/Runtime/Core/public/Math/Vector2.inl b/Engine/Source/Runtime/Core/public/Math/Vector2.inl index fd8b358..31ac958 100644 --- a/Engine/Source/Runtime/Core/public/Math/Vector2.inl +++ b/Engine/Source/Runtime/Core/public/Math/Vector2.inl @@ -1,3 +1,7 @@ +/** + * Contains functions, that have separate simd equivalents. + */ + #pragma once #include "Core/public/Math/Boilerplate.h" @@ -158,6 +162,22 @@ namespace Phanes::Core::Math return r; } + template + TVector2 operator/(T s, const TVector2& v1) + { + TVector2 r; + Detail::compute_vec2_div::map(r, s, v1); + return r; + } + + template + TVector2 operator-(T s, const TVector2& v1) + { + TVector2 r; + Detail::compute_vec2_sub::map(r, s, v1); + return r; + } + // Comparision template @@ -204,5 +224,58 @@ namespace Phanes::Core::Math } + template + T Magnitude(const TVector2& v1) + { + return Detail::compute_vec2_mag::map(v1); + } + template + T SqrMagnitude(const TVector2& v1) + { + return Detail::compute_vec2_dotp::map(v1, v1); + } + + template + inline T DotP(const TVector2& v1, const TVector2& v2) + { + return Detail::compute_vec2_dotp::map(v1, v2); + } + + template + TVector2& MaxV(TVector2& v1, const TVector2& v2) + { + Detail::compute_vec2_max::map(v1, v1, v2); + return v1; + } + + template + TVector2 Max(const TVector2& v1, const TVector2& v2) + { + TVector2 r; + Detail::compute_vec2_max::map(r, v1, v2); + return r; + } + + template + TVector2& MinV(TVector2& v1, const TVector2& v2) + { + Detail::compute_vec2_min::map(v1, v1, v2); + return v1; + } + + template + TVector2 Min(const TVector2& v1, const TVector2& v2) + { + TVector2 r; + Detail::compute_vec2_min::map(r, v1, v2); + return r; + } + + template + TVector2& Set(TVector2& v1, T x, T y) + { + Detail::compute_vec2_set::map(v1, x, y); + return v1; + } } \ No newline at end of file