From 45a167564de44c486711a9e8e1ef3cd9af72fd48 Mon Sep 17 00:00:00 2001 From: scorpioblood <77296181+scorpioblood@users.noreply.github.com> Date: Tue, 4 Jun 2024 22:13:28 +0200 Subject: [PATCH] SIMD improvement. --- .../Core/public/Math/Detail/Vector2Decl.inl | 192 +++++++ .../Core/public/Math/Detail/Vector3Decl.inl | 12 +- .../Core/public/Math/Detail/Vector4Decl.inl | 9 +- .../Source/Runtime/Core/public/Math/Include.h | 5 +- .../Source/Runtime/Core/public/Math/MathFwd.h | 11 +- .../public/Math/SIMD/PhanesVectorMathFPU.hpp | 498 +++++++++++++++++- .../public/Math/SIMD/PhanesVectorMathSSE.hpp | 187 ++++++- .../Runtime/Core/public/Math/SIMD/Platform.h | 12 +- .../Runtime/Core/public/Math/Vector2.hpp | 253 ++++----- .../Runtime/Core/public/Math/Vector2.inl | 205 +++++++ .../Runtime/Core/public/Math/Vector3.hpp | 36 +- .../Runtime/Core/public/Math/Vector3.inl | 4 +- .../Runtime/Core/public/Math/Vector4.hpp | 2 +- .../Runtime/Core/public/Math/Vector4.inl | 2 +- 14 files changed, 1187 insertions(+), 241 deletions(-) create mode 100644 Engine/Source/Runtime/Core/public/Math/Detail/Vector2Decl.inl create mode 100644 Engine/Source/Runtime/Core/public/Math/Vector2.inl diff --git a/Engine/Source/Runtime/Core/public/Math/Detail/Vector2Decl.inl b/Engine/Source/Runtime/Core/public/Math/Detail/Vector2Decl.inl new file mode 100644 index 0000000..e530c9e --- /dev/null +++ b/Engine/Source/Runtime/Core/public/Math/Detail/Vector2Decl.inl @@ -0,0 +1,192 @@ +#pragma once + +#include "Core/public/Math/Boilerplate.h" + +#include + +namespace Phanes::Core::Math::Detail +{ + template + struct construct_vec2 {}; + + template + struct compute_vec2_add {}; + + template + struct compute_vec2_sub {}; + + template + struct compute_vec2_mul {}; + + template + struct compute_vec2_div {}; + + template + struct compute_vec2_eq {}; + + template + struct compute_vec2_ieq {}; + + template + struct compute_vec2_inc {}; + + template + struct compute_vec2_dec {}; + + + + template + struct construct_vec2 + { + template + static constexpr void map(Phanes::Core::Math::TVector2& v1, const TVector2& v2) + { + v1.x = v2.x; + v1.y = v2.y; + } + + template + static constexpr void map(Phanes::Core::Math::TVector2& v1, T s) + { + v1.x = s; + v1.y = s; + } + + template + static constexpr void map(Phanes::Core::Math::TVector2& v1, T x, T y) + { + v1.x = x; + v1.y = y; + } + + template + static constexpr void map(Phanes::Core::Math::TVector2& v1, const T* comp) + { + v1.x = comp[0]; + v1.y = comp[1]; + } + }; + + + template + struct compute_vec2_add + { + template + static constexpr void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.x = v1.x + v2.x; + r.y = v1.y + v2.y; + } + + template + static constexpr void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, T s) + { + r.x = v1.x + s; + r.y = v1.y + s; + } + }; + + + template + struct compute_vec2_sub + { + template + static constexpr void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.x = v1.x - v2.x; + r.y = v1.y - v2.y; + } + + template + static constexpr void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, T s) + { + r.x = v1.x - s; + r.y = v1.y - s; + } + }; + + + template + struct compute_vec2_mul + { + template + static constexpr void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.x = v1.x * v2.x; + r.y = v1.y * v2.y; + } + + template + static constexpr void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, T s) + { + r.x = v1.x * s; + r.y = v1.y * s; + } + }; + + + template + struct compute_vec2_div + { + template + static constexpr void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.x = v1.x / v2.x; + r.y = v1.y / v2.y; + } + + template + static constexpr void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, T s) + { + s = (T)1.0 / s; + + r.x = v1.x * s; + r.y = v1.y * s; + } + }; + + template + struct compute_vec2_eq + { + template + static constexpr bool map(const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + return (Phanes::Core::Math::Abs(v1.x - v2.x) < P_FLT_INAC && + Phanes::Core::Math::Abs(v1.y - v2.y) < P_FLT_INAC); + } + }; + + template + struct compute_vec2_ieq + { + template + static constexpr bool map(const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + return (Phanes::Core::Math::Abs(v1.x - v2.x) > P_FLT_INAC || + Phanes::Core::Math::Abs(v1.y - v2.y) > P_FLT_INAC); + } + }; + + template + struct compute_vec2_inc + { + template + static constexpr void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1) + { + r.x = v1.x + 1; + r.y = v1.y + 1; + } + }; + + template + struct compute_vec2_dec + { + template + static constexpr void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1) + { + r.x = v1.x - 1; + r.y = v1.y - 1; + } + }; +} + diff --git a/Engine/Source/Runtime/Core/public/Math/Detail/Vector3Decl.inl b/Engine/Source/Runtime/Core/public/Math/Detail/Vector3Decl.inl index 8d1d810..6151cb6 100644 --- a/Engine/Source/Runtime/Core/public/Math/Detail/Vector3Decl.inl +++ b/Engine/Source/Runtime/Core/public/Math/Detail/Vector3Decl.inl @@ -61,21 +61,13 @@ namespace Phanes::Core::Math::Detail v1.w = (T)0.0; } - /*static constexpr void map(Phanes::Core::Math::TVector3& v1, const Phanes::Core::Math::TVector2& v2, const Phanes::Core::Math::TVector2& v3) + static constexpr void map(Phanes::Core::Math::TVector3& v1, const Phanes::Core::Math::TVector2& v2, T s) { v1.x = v2.x; v1.y = v2.y; - v1.z = v3.x; - v1.w = v3.y; + v1.z = s; } - static constexpr void map(Phanes::Core::Math::TVector3& v1, const Phanes::Core::Math::TVector2& v2, const Phanes::Core::Math::TVector2& v3) - { - v1.x = v2.x; - v1.y = v2.y; - v1.z = v3.x; - v1.w = v3.y; - }*/ static constexpr void map(Phanes::Core::Math::TVector3& v1, const T* comp) { diff --git a/Engine/Source/Runtime/Core/public/Math/Detail/Vector4Decl.inl b/Engine/Source/Runtime/Core/public/Math/Detail/Vector4Decl.inl index 631ef4c..560d371 100644 --- a/Engine/Source/Runtime/Core/public/Math/Detail/Vector4Decl.inl +++ b/Engine/Source/Runtime/Core/public/Math/Detail/Vector4Decl.inl @@ -61,7 +61,7 @@ namespace Phanes::Core::Math::Detail v1.w = w; } - /*static constexpr void map(Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector2& v2, const Phanes::Core::Math::TVector2& v3) + static constexpr void map(Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector2& v2, const Phanes::Core::Math::TVector2& v3) { v1.x = v2.x; v1.y = v2.y; @@ -69,13 +69,6 @@ namespace Phanes::Core::Math::Detail v1.w = v3.y; } - static constexpr void map(Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector2& v2, const Phanes::Core::Math::TVector2& v3) - { - v1.x = v2.x; - v1.y = v2.y; - v1.z = v3.x; - v1.w = v3.y; - }*/ static constexpr void map(Phanes::Core::Math::TVector4& v1, const T* comp) { diff --git a/Engine/Source/Runtime/Core/public/Math/Include.h b/Engine/Source/Runtime/Core/public/Math/Include.h index c0ad70c..dc655a5 100644 --- a/Engine/Source/Runtime/Core/public/Math/Include.h +++ b/Engine/Source/Runtime/Core/public/Math/Include.h @@ -1,4 +1,5 @@ #pragma once -#include "Core/public/Math/Vector3.hpp" -#include "Core/public/Math/Vector4.hpp" \ No newline at end of file +// --- Vectors ------------------------ + +#include "Core/public/Math/Vector2.hpp" // <-- Includes Vector3/4 automatically \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/MathFwd.h b/Engine/Source/Runtime/Core/public/Math/MathFwd.h index 33aae52..7c4a8aa 100644 --- a/Engine/Source/Runtime/Core/public/Math/MathFwd.h +++ b/Engine/Source/Runtime/Core/public/Math/MathFwd.h @@ -26,7 +26,6 @@ namespace Phanes::Core::Math { template struct TColor; template struct TLinearColor; - template struct TVector2; template struct TRay; template struct TLine; template struct TPlane; @@ -44,6 +43,7 @@ namespace Phanes::Core::Math { template struct TIntPoint2; template struct TIntPoint3; template struct TIntPoint4; + template struct TVector2; template struct TVector3; template struct TVector4; @@ -51,15 +51,6 @@ namespace Phanes::Core::Math { * Specific instantiation of forward declarations. */ - // TVector2 - typedef TVector2 Vector2; - typedef TVector2 Vector2d; - - typedef std::vector Vector2List; - typedef std::vector Vector2Listd; - - - // TIntVector2 typedef TIntVector2 IntVector2; typedef TIntVector2 IntVector2l; diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp index 1a370f1..94f7c74 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp @@ -1 +1,497 @@ -#pragma once \ No newline at end of file +#pragma once + +#include + +#include "Core/public/Math/SIMD/PhanesSIMDTypes.h" +#include "Core/public/Math/Boilerplate.h" +#include "Core/public/Math/MathCommon.hpp" + + +// Required includes +#include "Core/public/Math/Vector3.hpp" +#include "Core/public/Math/Vector4.hpp" + +// ========== // +// Common // +// ========== // + +namespace Phanes::Core::Math::SIMD +{ + /// + /// Adds all scalars of the vector. + /// + /// Vector + /// Sum stored in v[0:31]. + Phanes::Core::Types::Vec4f32Reg vec4_hadd(const Phanes::Core::Types::Vec4f32Reg v) + { + Phanes::Core::Types::Vec4f32Reg r; + r.data[0] = (v.data[0] + v.data[1] + v.data[2] + v.data[3]); + return r; + } + + /// + /// Adds all scalars of the vector. + /// + /// Vector + /// Sum of components. + float vec4_hadd_cvtf32(const Phanes::Core::Types::Vec4f32Reg v) + { + return (v.data[0] + v.data[1] + v.data[2] + v.data[3]); + } + + /// + /// Gets the absolute value of each scalar in the vector. + /// + /// Vector + /// Vector with all components positive. + Phanes::Core::Types::Vec4f32Reg vec4_abs(const Phanes::Core::Types::Vec4f32Reg v) + { + Phanes::Core::Types::Vec4f32Reg r; + r.data[0] = Phanes::Core::Math::Abs(v.data[0]); + r.data[1] = Phanes::Core::Math::Abs(v.data[1]); + r.data[2] = Phanes::Core::Math::Abs(v.data[2]); + r.data[3] = Phanes::Core::Math::Abs(v.data[3]); + + return r; + } + + /// + /// Gets the dot product of the + /// + /// + /// + /// + Phanes::Core::Types::Vec4f32Reg vec4_dot(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2) + { + Phanes::Core::Types::Vec4f32Reg r; + r.data[0] = (v1.data[0] * v2.data[0] + v1.data[1] * v2.data[1] + v1.data[2] * v2.data[2] + v1.data[3] * v2.data[3]); + return r; + } + + /// + /// Gets the dot product of the + /// + /// + /// + /// + float vec4_dot_cvtf32(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2) + { + return (v1.data[0] * v2.data[0] + v1.data[1] * v2.data[1] + v1.data[2] * v2.data[2] + v1.data[3] * v2.data[3]); + } +} + + + + +// ============ // +// TVector4 // +// ============ // + + +namespace Phanes::Core::Math::Detail +{ + // Template class has already been defined and is included through: Storage.h -> Vector4.hpp -> SIMDIntrinsics.h -> PhanesVectorMathSEE.hpp + + + template<> + struct construct_vec4 + { + static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, const TVector4& v2) + { + v1.x = v2.x; + v1.y = v2.y; + v1.z = v2.z; + v1.w = v2.w; + } + + + static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, float s) + { + v1.x = s; + v1.y = s; + v1.z = s; + v1.w = s; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, float x, float y, float z, float w) + { + v1.x = x; + v1.y = y; + v1.z = z; + v1.w = w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector2& v2, const Phanes::Core::Math::TVector2& v3) + { + v1.x = v2.x; + v1.y = v2.y; + v1.x = v3.x; + v1.y = v3.y; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, const float* s) + { + v1.x = s[0]; + v1.y = s[1]; + v1.z = s[2]; + v1.w = s[3]; + + } + }; + + + template<> + struct compute_vec4_add + { + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + r.x = v1.x + v2.x; + r.y = v1.y + v2.y; + r.z = v1.z + v2.z; + r.w = v1.w + v2.w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, float s) + { + r.x = v1.x + s; + r.y = v1.y + s; + r.z = v1.z + s; + r.w = v1.w + s; + } + }; + + template<> + struct compute_vec4_sub + { + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + r.x = v1.x - v2.x; + r.y = v1.y - v2.y; + r.z = v1.z - v2.z; + r.w = v1.w - v2.w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, float s) + { + r.x = v1.x - s; + r.y = v1.y - s; + r.z = v1.z - s; + r.w = v1.w - s; + } + }; + + template<> + struct compute_vec4_mul + { + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + r.x = v1.x * v2.x; + r.y = v1.y * v2.y; + r.z = v1.z * v2.z; + r.w = v1.w * v2.w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, float s) + { + r.x = v1.x * s; + r.y = v1.y * s; + r.z = v1.z * s; + r.w = v1.w * s; + } + }; + + template<> + struct compute_vec4_div + { + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + r.x = v1.x / v2.x; + r.y = v1.y / v2.y; + r.z = v1.z / v2.z; + r.w = v1.w / v2.w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, float s) + { + s = 1.0f / s; + + r.x = v1.x * s; + r.y = v1.y * s; + r.z = v1.z * s; + r.w = v1.w * s; + } + }; + + template<> + struct compute_vec4_inc + { + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1) + { + r.x = v1.x + 1; + r.y = v1.y + 1; + r.z = v1.z + 1; + r.w = v1.w + 1; + } + }; + + template<> + struct compute_vec4_dec + { + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1) + { + r.x = v1.x - 1; + r.y = v1.y - 1; + r.z = v1.z - 1; + r.w = v1.w - 1; + } + }; + + template<> + struct compute_vec4_eq + { + static FORCEINLINE bool map(const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + return (Phanes::Core::Math::Abs(v1.x - v2.x) < P_FLT_INAC && + Phanes::Core::Math::Abs(v1.y - v2.y) < P_FLT_INAC && + Phanes::Core::Math::Abs(v1.z - v2.z) < P_FLT_INAC && + Phanes::Core::Math::Abs(v1.w - v2.w) < P_FLT_INAC); + } + }; + + template<> + struct compute_vec4_ieq + { + static FORCEINLINE bool map(const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + return (Phanes::Core::Math::Abs(v1.x - v2.x) > P_FLT_INAC || + Phanes::Core::Math::Abs(v1.y - v2.y) > P_FLT_INAC || + Phanes::Core::Math::Abs(v1.z - v2.z) > P_FLT_INAC || + Phanes::Core::Math::Abs(v1.w - v2.w) > P_FLT_INAC); + } + }; + + + // ============ // + // TVector3 // + // ============ // + + + template<> + struct construct_vec3 + { + static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, const TVector3& v2) + { + v1.x = v2.x; + v1.y = v2.y; + v1.z = v2.z; + v1.w = 0.0f; + } + + + static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, float s) + { + v1.x = s; + v1.y = s; + v1.z = s; + v1.w = 0.0f; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, float x, float y, float z) + { + v1.x = x; + v1.y = y; + v1.z = z; + v1.w = 0.0f; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, const Phanes::Core::Math::TVector2& v2, float s) + { + v1.x = v2.x; + v1.y = v2.y; + v1.z = s; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, const float* comp) + { + v1.x = comp[0]; + v1.y = comp[1]; + v1.z = comp[2]; + v1.w = 0.0f; + + } + }; + + + + template<> struct compute_vec3_eq : public compute_vec4_eq + { + static FORCEINLINE bool map(Phanes::Core::Math::TVector3& v1, Phanes::Core::Math::TVector3& v2) + { + return (Phanes::Core::Math::Abs(v1.x - v2.x) < P_FLT_INAC && + Phanes::Core::Math::Abs(v1.y - v2.y) < P_FLT_INAC && + Phanes::Core::Math::Abs(v1.z - v2.z) < P_FLT_INAC); + } + }; + + template<> struct compute_vec3_ieq : public compute_vec4_ieq + { + static FORCEINLINE bool map(Phanes::Core::Math::TVector3& v1, Phanes::Core::Math::TVector3& v2) + { + return (Phanes::Core::Math::Abs(v1.x - v2.x) > P_FLT_INAC || + Phanes::Core::Math::Abs(v1.y - v2.y) > P_FLT_INAC || + Phanes::Core::Math::Abs(v1.z - v2.z) > P_FLT_INAC); + } + }; + + + template<> struct compute_vec3_add : public compute_vec4_add {}; + template<> struct compute_vec3_sub : public compute_vec4_sub {}; + template<> struct compute_vec3_mul : public compute_vec4_mul {}; + template<> struct compute_vec3_div : public compute_vec4_div {}; + template<> struct compute_vec3_inc : public compute_vec4_inc {}; + template<> struct compute_vec3_dec : public compute_vec4_dec {}; + + // ============ // + // TVector2 // + // ============ // + + + template<> + struct construct_vec2 + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + v1.x = v2.x; + v1.y = v2.y; + } + + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, double s) + { + v1.x = s; + v1.y = s; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, double x, double y) + { + v1.x = x; + v1.y = y; + } + + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, const double* comp) + { + v1.x = comp[0]; + v1.y = comp[1]; + } + }; + + + template<> + struct compute_vec2_add + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.x = v1.x + v2.x; + r.y = v1.y + v2.y; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, double s) + { + r.x = v1.x + s; + r.y = v1.y + s; + } + }; + + template<> + struct compute_vec2_sub + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.x = v1.x - v2.x; + r.y = v1.y - v2.y; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, double s) + { + r.x = v1.x - s; + r.y = v1.y - s; + } + }; + + template<> + struct compute_vec2_mul + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.x = v1.x * v2.x; + r.y = v1.y * v2.y; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, double s) + { + r.x = v1.x * s; + r.y = v1.y * s; + } + }; + + template<> + struct compute_vec2_div + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.x = v1.x / v2.x; + r.y = v1.y / v2.y; + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, double s) + { + s = 1.0f / s; + + r.x = v1.x * s; + r.y = v1.y * s; + } + }; + + template<> + struct compute_vec2_inc + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1) + { + r.x = v1.x + 1; + r.y = v1.y + 1; + } + }; + + template<> + struct compute_vec2_dec + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1) + { + r.x = v1.x - 1; + r.y = v1.y - 1; + } + }; + + template<> + struct compute_vec2_eq + { + static FORCEINLINE bool map(const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + return (Phanes::Core::Math::Abs(v1.x - v2.x) < P_FLT_INAC && + Phanes::Core::Math::Abs(v1.y - v2.y) < P_FLT_INAC); + } + }; + + template<> + struct compute_vec2_ieq + { + static FORCEINLINE bool map(const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + return (Phanes::Core::Math::Abs(v1.x - v2.x) > P_FLT_INAC || + Phanes::Core::Math::Abs(v1.y - v2.y) > P_FLT_INAC); + } + }; + + +} \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp index e42d65f..e961593 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp @@ -5,10 +5,10 @@ #include "Core/public/Math/SIMD/PhanesSIMDTypes.h" #include "Core/public/Math/Boilerplate.h" #include "Core/public/Math/MathCommon.hpp" -// -> For IntelliSense + +// Required includes #include "Core/public/Math/Vector3.hpp" - #include "Core/public/Math/Vector4.hpp" // ========== // @@ -110,19 +110,11 @@ namespace Phanes::Core::Math::Detail v1.comp = _mm_setr_ps(x, y, z, w); } - /*static constexpr void map(Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector2& v2, const Phanes::Core::Math::TVector2& v3) + static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector2& v2, const Phanes::Core::Math::TVector2& v3) { v1.comp = _mm_set_ps(v2.x, v2.y, v3.x, v3.y); } - static constexpr void map(Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector2& v2, const Phanes::Core::Math::TVector2& v3) - { - v1.x = v2.x; - v1.y = v2.y; - v1.z = v3.x; - v1.w = v3.y; - }*/ - static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, const float* s) { v1.comp = _mm_loadu_ps(s); @@ -228,9 +220,9 @@ namespace Phanes::Core::Math::Detail }; - //// ============ // - //// TVector3 // - //// ============ // + // ============ // + // TVector3 // + // ============ // template<> @@ -252,10 +244,10 @@ namespace Phanes::Core::Math::Detail v1.comp = _mm_setr_ps(x, y, z, 0.0f); } - /*static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, const Phanes::Core::Math::TVector2& v2, float s) + static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, const Phanes::Core::Math::TVector2& v2, float s) { - v1.comp = _mm_set_ps(v2.x, v2.y, v3.x, v3.y); - }*/ + v1.comp = _mm_set_ps(v2.x, v2.y, s, 0.0f); + } static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, const float* s) { @@ -265,24 +257,169 @@ namespace Phanes::Core::Math::Detail }; - template<> - struct compute_vec3_inc + + template<> struct compute_vec3_eq : public compute_vec4_eq { - static FORCEINLINE void map(Phanes::Core::Math::TVector3& r, const Phanes::Core::Math::TVector3& v1) + static FORCEINLINE bool map(Phanes::Core::Math::TVector3& v1, Phanes::Core::Math::TVector3& v2) { - r.comp = _mm_add_ps(v1.comp, _mm_set_ps(1.0f, 1.0f, 1.0f, 0.0f)); + v1.comp = _mm_setr_ps(v1.x, v1.y, v1.z, 0.0f); + v2.comp = _mm_setr_ps(v2.x, v2.y, v2.z, 0.0f); + + float r; + _mm_store_ps1(&r, _mm_cmpeq_ps(v1.comp, v2.comp)); + return (r == 0xffffffff) ? true : false; + } + }; + + template<> struct compute_vec3_ieq : public compute_vec4_ieq + { + static FORCEINLINE bool map(Phanes::Core::Math::TVector3& v1, Phanes::Core::Math::TVector3& v2) + { + v1.comp = _mm_setr_ps(v1.x, v1.y, v1.z, 0.0f); + v2.comp = _mm_setr_ps(v2.x, v2.y, v2.z, 0.0f); + + float r; + _mm_store_ps1(&r, _mm_cmpneq_ps(v1.comp, v2.comp)); + return (r == 0xffffffff) ? true : false; + } + }; + + + template<> struct compute_vec3_add : public compute_vec4_add {}; + template<> struct compute_vec3_sub : public compute_vec4_sub {}; + template<> struct compute_vec3_mul : public compute_vec4_mul {}; + template<> struct compute_vec3_div : public compute_vec4_div {}; + template<> struct compute_vec3_inc : public compute_vec4_inc {}; + template<> struct compute_vec3_dec : public compute_vec4_dec {}; + + // ============ // + // TVector2 // + // ============ // + + + template<> + struct construct_vec2 + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, const TVector2& v2) + { + v1.comp = _mm_setr_pd(v2.x, v2.y); + } + + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, double s) + { + v1.comp = _mm_set_pd1(s); + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, double x, double y) + { + v1.comp = _mm_setr_pd(x, y); + } + + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, const double* s) + { + v1.comp = _mm_loadu_pd(s); + } }; template<> - struct compute_vec3_dec + struct compute_vec2_add { - static FORCEINLINE void map(Phanes::Core::Math::TVector3& r, const Phanes::Core::Math::TVector3& v1) + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) { - r.comp = _mm_sub_ps(v1.comp, _mm_set_ps(1.0f, 1.0f, 1.0f, 0.0f)); + r.comp = _mm_add_pd(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, double s) + { + r.comp = _mm_add_pd(v1.comp, _mm_set1_pd(s)); } }; - template<> struct compute_vec3_add : public compute_vec4_add {}; + template<> + struct compute_vec2_sub + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.comp = _mm_sub_pd(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, double s) + { + r.comp = _mm_sub_pd(v1.comp, _mm_set1_pd(s)); + } + }; + + template<> + struct compute_vec2_mul + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.comp = _mm_mul_pd(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, double s) + { + r.comp = _mm_mul_pd(v1.comp, _mm_set1_pd(s)); + } + }; + + template<> + struct compute_vec2_div + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + r.comp = _mm_div_pd(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, double s) + { + r.comp = _mm_div_pd(v1.comp, _mm_set1_pd(s)); + } + }; + + template<> + struct compute_vec2_inc + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1) + { + r.comp = _mm_add_pd(v1.comp, _mm_set1_pd(1.0f)); + } + }; + + template<> + struct compute_vec2_dec + { + static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1) + { + r.comp = _mm_sub_pd(v1.comp, _mm_set1_pd(1.0f)); + } + }; + + template<> + struct compute_vec2_eq + { + static FORCEINLINE bool map(const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + double r; + _mm_store1_pd(&r, _mm_cmpeq_pd(v1.comp, v2.comp)); + return (r == 0xffffffff) ? true : false; + } + }; + + template<> + struct compute_vec2_ieq + { + static FORCEINLINE bool map(const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2) + { + double r; + _mm_store1_pd(&r, _mm_cmpneq_pd(v1.comp, v2.comp)); + return (r == 0xffffffff) ? true : false; + } + }; + + } \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h b/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h index a2b8b61..9a3833c 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h @@ -264,8 +264,7 @@ # error P_INTRINSICS must be defined by the user, when P_FORCE_INTRINSICS is used. # endif -#else - +#elif !defined(P_FORCE_FPU) # ifdef __AVX2__ # define P_AVX2__ 1 # elif defined(__AVX__) @@ -274,7 +273,6 @@ # define P_SSE__ 1 # endif - #endif // !P_FORCE_INTRINSICS #ifdef P_AVX2__ @@ -312,10 +310,10 @@ #if defined(P_FORCE_FPU) // Force, that no intrinsics may be used. # define P_INTRINSICS P_INTRINSICS_FPU -# undef P_AVX2__ -# undef P_AVX__ -# undef P_SSE__ -# undef P_SSE__ +# define P_AVX2__ 0 +# define P_AVX__ 0 +# define P_SSE__ 0 +# define P_SSE__ 0 #else # if (P_AVX__ == 1) && (P_AVX2__ == 0) # define P_INTRINSICS P_INTRINSICS_AVX diff --git a/Engine/Source/Runtime/Core/public/Math/Vector2.hpp b/Engine/Source/Runtime/Core/public/Math/Vector2.hpp index 99944df..b20a3d3 100644 --- a/Engine/Source/Runtime/Core/public/Math/Vector2.hpp +++ b/Engine/Source/Runtime/Core/public/Math/Vector2.hpp @@ -3,9 +3,10 @@ #include "Core/public/Math/Boilerplate.h" #include "Core/public/Math/MathCommon.hpp" -#include "Core/public/Math/MathAbstractTypes.h" #include "Core/public/Math/MathFwd.h" +#include "Core/public/Math/SIMD/Storage.h" + #ifndef P_DEBUG #pragma warning(disable : 4244) #endif @@ -29,7 +30,7 @@ namespace Phanes::Core::Math { */ - template + template struct TVector2 { public: @@ -67,7 +68,11 @@ namespace Phanes::Core::Math { * @note Components are split into x and y. Access and manipulation is possible by these variables. */ - Real* comp; + union + { + typename SIMD::Storage<2, T, SIMD::use_simd::value>::type comp; + typename SIMD::Storage<2, T, SIMD::use_simd::value>::type data; + }; }; @@ -86,27 +91,7 @@ namespace Phanes::Core::Math { * Copy constructor */ - TVector2(const TVector2& v) - { - memcpy(this->comp, comp, sizeof(T) * 2); - } - - /** - * Move constructor - */ - - TVector2(TVector2&& v) - { - this->comp = v.comp; - v.comp = nullptr; - } - - /** - * Convert other type of vector - */ - - template - explicit TVector2(const TVector2& v) : x((T)v.x), y((T)v.y) {}; + TVector2(const TVector2& v); /** * Construct Vector from xy components. @@ -115,33 +100,19 @@ namespace Phanes::Core::Math { * @param(y) Y component */ - TVector2(const Real x, const Real y) : x(x), y(y) {}; + TVector2(const Real x, const Real y); - /** - * Construct Vector from two component array. - * - * @param(comp) Array of components - */ - - explicit TVector2(const Real* comp) - { - memcpy(this->comp, comp, sizeof(T) * 2); - } - - - /** - * Constructs a vector pointing from start to end. - * - * @param(start) Startingpoint - * @param(end) Endpoint - */ - - TVector2(const TPoint2& start, const TPoint2& end) - { - this->x = end.x - start.x; - this->y = end.y - start.y; - } + /// + /// Construct vector from array. + /// + /// Array of at least 2 items. + TVector2(const Real* comp); + /// + /// Construct vector by broadcasting one scalar into all components. + /// + /// Scalar + TVector2(Real s); }; // ====================== // @@ -149,14 +120,14 @@ namespace Phanes::Core::Math { // ====================== // /** - * Addition operation on same TVector2 (this) by a floating point value. + * Addition operation on same TVector2 (this) by a floating point value. * * @param(v1) Vector to add to * @param(s) Floating point to add */ template - TVector2 operator+= (TVector2& v1, T s) + TVector2 operator+= (TVector2& v1, T s) { v1.x += s; v1.y += s; @@ -165,14 +136,14 @@ namespace Phanes::Core::Math { } /** - * Addition operation on same TVector2 (this) by a another TVector2. + * Addition operation on same TVector2 (this) by a another TVector2. * * @param(v1) Vector to add to * @param(v2) Vector to add */ template - TVector2 operator+= (TVector2& v1, const TVector2& v2) + TVector2 operator+= (TVector2& v1, const TVector2& v2) { v1.x += v2.x; v1.y += v2.y; @@ -181,14 +152,14 @@ namespace Phanes::Core::Math { } /** - * Substraction operation on same TVector2 (this) by a floating point. + * Substraction operation on same TVector2 (this) by a floating point. * * @param(v1) Vector to substract from * @param(v2) Floating point to substract */ template - TVector2 operator-= (TVector2& v1, T s) + TVector2 operator-= (TVector2& v1, T s) { v1.x -= s; v1.y -= s; @@ -197,14 +168,14 @@ namespace Phanes::Core::Math { } /** - * Substraction operation on same TVector2 (this) by a another TVector2. + * Substraction operation on same TVector2 (this) by a another TVector2. * * @param(v1) Vector to substract from * @param(v2) Vector to substract */ template - TVector2 operator-= (TVector2& v1, const TVector2& v2) + TVector2 operator-= (TVector2& v1, const TVector2& v2) { v1.x -= v2.x; v1.y -= v2.y; @@ -213,14 +184,14 @@ namespace Phanes::Core::Math { } /** - * Multiplication of TVector2 (this) with a floating point. + * Multiplication of TVector2 (this) with a floating point. * * @param(v1) Vector to multiply with * @param(s Floating point to multiply with */ template - TVector2 operator*= (TVector2& v1, T s) + TVector2 operator*= (TVector2& v1, T s) { v1.x *= s; v1.y *= s; @@ -236,7 +207,7 @@ namespace Phanes::Core::Math { */ template - TVector2 operator/= (TVector2& v1, T s) + TVector2 operator/= (TVector2& v1, T s) { s = 1.0f / s; v1.x *= s; @@ -246,7 +217,7 @@ namespace Phanes::Core::Math { } /** - * Scale of Vector by floating point. (> Creates a new TVector2) + * Scale of Vector by floating point. (> Creates a new TVector2) * * @param(v1) Vector to multiply with * @param(s Floating point to multiply with @@ -255,13 +226,13 @@ namespace Phanes::Core::Math { */ template - TVector2 operator* (const TVector2& v1, T s) + TVector2 operator* (const TVector2& v1, T s) { - return TVector2(v1.x * s, v1.y * s); + return TVector2(v1.x * s, v1.y * s); } /** - * Division of Vector by floating point. (> Creates another TVector2) + * Division of Vector by floating point. (> Creates another TVector2) * * @param(v1) Vector to multiply with * @param(s Floating point to divide with @@ -270,14 +241,14 @@ namespace Phanes::Core::Math { */ template - TVector2 operator/ (const TVector2& v1, T s) + TVector2 operator/ (const TVector2& v1, T s) { s = 1.0f / s; - return TVector2(v1.x * s, v1.y * s); + return TVector2(v1.x * s, v1.y * s); } /** - * Scale of Vector by floating point. (> Creates a new TVector2) + * Scale of Vector by floating point. (> Creates a new TVector2) * * @param(v1) Vector to multiply with * @param(s Floating point to multiply with @@ -286,7 +257,7 @@ namespace Phanes::Core::Math { */ template - inline TVector2 operator* (T s, const TVector2& v1) + inline TVector2 operator* (T s, const TVector2& v1) { return v1 * s; } @@ -301,7 +272,7 @@ namespace Phanes::Core::Math { */ template - inline TVector2 operator/ (T s, const TVector2& v1) + inline TVector2 operator/ (T s, const TVector2& v1) { s = 1.0f / s; return v1 * s; @@ -319,7 +290,7 @@ namespace Phanes::Core::Math { */ template - inline T operator* (const TVector2& v1, const TVector2& v2) + inline T operator* (const TVector2& v1, const TVector2& v2) { return v1.x * v2.x + v1.y * v2.y; } @@ -334,9 +305,9 @@ namespace Phanes::Core::Math { */ template - TVector2 operator+ (const TVector2& v1, T s) + TVector2 operator+ (const TVector2& v1, T s) { - return TVector2(v1.x + s, v1.y + s); + return TVector2(v1.x + s, v1.y + s); } /** @@ -349,9 +320,9 @@ namespace Phanes::Core::Math { */ template - TVector2 operator+ (const TVector2& v1, const TVector2& v2) + TVector2 operator+ (const TVector2& v1, const TVector2& v2) { - return TVector2(v1.x + v2.x, v1.y + v2.y); + return TVector2(v1.x + v2.x, v1.y + v2.y); } /** @@ -364,9 +335,9 @@ namespace Phanes::Core::Math { */ template - TVector2 operator- (const TVector2& v1, T s) + TVector2 operator- (const TVector2& v1, T s) { - return TVector2(v1.x - s, v1.y - s); + return TVector2(v1.x - s, v1.y - s); } /** @@ -379,9 +350,9 @@ namespace Phanes::Core::Math { */ template - TVector2 operator- (const TVector2& v1, const TVector2& v2) + TVector2 operator- (const TVector2& v1, const TVector2& v2) { - return TVector2(v1.x - v2.x, v1.y - v2.y); + return TVector2(v1.x - v2.x, v1.y - v2.y); } /** @@ -391,9 +362,9 @@ namespace Phanes::Core::Math { */ template - TVector2 operator- (const TVector2& v1) + TVector2 operator- (const TVector2& v1) { - return TVector2&(-v1.x, -v1.y); + return TVector2&(-v1.x, -v1.y); } @@ -408,7 +379,7 @@ namespace Phanes::Core::Math { */ template - bool operator== (const TVector2& v1, const TVector2& v2) + bool operator== (const TVector2& v1, const TVector2& v2) { return (abs(v1.x - v1.x) < P_FLT_INAC && abs(v1.y - v1.y) < P_FLT_INAC); } @@ -425,7 +396,7 @@ namespace Phanes::Core::Math { */ template - bool operator!= (const TVector2& v1, const TVector2& v2) + bool operator!= (const TVector2& v1, const TVector2& v2) { return (abs(v1.x - v1.x) > P_FLT_INAC || abs(v1.y - v1.y) > P_FLT_INAC); } @@ -444,7 +415,7 @@ namespace Phanes::Core::Math { */ template - T Magnitude(const TVector2& v1) + T Magnitude(const TVector2& v1) { return sqrtf(v1.x * v1.x + v1.y * v1.y); } @@ -453,7 +424,7 @@ namespace Phanes::Core::Math { * @see [FUNC]Magnitude */ template - FORCEINLINE T Length(const TVector2& v1) { return Magnitude(v1); }; + FORCEINLINE T Length(const TVector2& v1) { return Magnitude(v1); }; /** * Square of magnitude of Vector @@ -464,7 +435,7 @@ namespace Phanes::Core::Math { */ template - T SqrMagnitude(const TVector2& v1) + T SqrMagnitude(const TVector2& v1) { return v1.x * v1.x + v1.y * v1.y; } @@ -473,7 +444,7 @@ namespace Phanes::Core::Math { * @see [FUNC]SqrMagnitude */ template - FORCEINLINE T SqrLength(const TVector2& v1) { return SqrMagnitude(v1); }; + FORCEINLINE T SqrLength(const TVector2& v1) { return SqrMagnitude(v1); }; /** * Normalize Vector @@ -482,7 +453,7 @@ namespace Phanes::Core::Math { */ template - TVector2 NormalizeV(TVector2& v1) + TVector2 NormalizeV(TVector2& v1) { float vecNorm = Magnitude(v1); v1 /= (vecNorm < P_FLT_INAC) ? 1 : vecNorm; @@ -498,7 +469,7 @@ namespace Phanes::Core::Math { */ template - TVector2 UnsafeNormalizeV(TVector2& v1) + TVector2 UnsafeNormalizeV(TVector2& v1) { v1 /= Magnitude(v1); @@ -513,7 +484,7 @@ namespace Phanes::Core::Math { */ template - T Angle(const TVector2& v1, const TVector2& v2) + T Angle(const TVector2& v1, const TVector2& v2) { return acos((v1 * v2) / Magnitude(v1) * Magnitude(v2)); } @@ -526,7 +497,7 @@ namespace Phanes::Core::Math { */ template - T CosineAngle(const TVector2& v1, const TVector2& v2) + T CosineAngle(const TVector2& v1, const TVector2& v2) { return (v1 * v2) / Magnitude(v1) * Magnitude(v2); } @@ -538,7 +509,7 @@ namespace Phanes::Core::Math { */ template - TVector2 SignVectorV(TVector2& v1) + TVector2 SignVectorV(TVector2& v1) { v1.x = (v1.x >= 0) ? 1 : -1; v1.y = (v1.y >= 0) ? 1 : -1; @@ -554,7 +525,7 @@ namespace Phanes::Core::Math { */ template - TVector2 BindToSquareV(TVector2& v1, T radius) + TVector2 BindToSquareV(TVector2& v1, T radius) { float k = (abs(v1.x) > abs(v1.y)) ? abs(radius / v1.x) : abs(radius / v1.y); v1 *= k; @@ -570,7 +541,7 @@ namespace Phanes::Core::Math { */ template - TVector2 ClampToSquareV(TVector2& v1, T radius) + TVector2 ClampToSquareV(TVector2& v1, T radius) { float prime = (abs(v1.x) > abs(v1.y)) ? v1.x : v1.y; float k = (prime > radius) ? abs(radius / prime) : 1.0f; @@ -587,7 +558,7 @@ namespace Phanes::Core::Math { */ template - inline T DotP(const TVector2& v1, const TVector2& v2) + inline T DotP(const TVector2& v1, const TVector2& v2) { return v1.x * v2.x + v1.y * v2.y; } @@ -602,7 +573,7 @@ namespace Phanes::Core::Math { */ template - TVector2 MaxV(TVector2& v1, const TVector2& v2) + TVector2 MaxV(TVector2& v1, const TVector2& v2) { v1.x = Phanes::Core::Math::Max(v1.x, v2.x); v1.y = Phanes::Core::Math::Max(v1.y, v2.y); @@ -620,7 +591,7 @@ namespace Phanes::Core::Math { */ template - TVector2 MinV(TVector2& v1, const TVector2& v2) + TVector2 MinV(TVector2& v1, const TVector2& v2) { v1.x = Phanes::Core::Math::Min(v1.x, v2.x); v1.y = Phanes::Core::Math::Min(v1.y, v2.y); @@ -637,7 +608,7 @@ namespace Phanes::Core::Math { */ template - TVector2 GetPerpendicularV(TVector2& v1) + TVector2 GetPerpendicularV(TVector2& v1) { T x = v1.x; v1.x = v1.y; @@ -657,7 +628,7 @@ namespace Phanes::Core::Math { */ template - TVector2 GetReversePerpendicularV(TVector2& v1) + TVector2 GetReversePerpendicularV(TVector2& v1) { T x = v1.x; v1.x = -v1.y; @@ -676,7 +647,7 @@ namespace Phanes::Core::Math { */ template - TVector2 ScaleV(TVector2& v1, const TVector2& v2) + TVector2 ScaleV(TVector2& v1, const TVector2& v2) { v1.x *= v2.x; v1.y *= v2.y; @@ -693,7 +664,7 @@ namespace Phanes::Core::Math { */ template - TVector2 CompInverseV(TVector2& v1) + TVector2 CompInverseV(TVector2& v1) { v1.x = 1.0f / v1.x; v1.y = 1.0f / v1.y; @@ -711,7 +682,7 @@ namespace Phanes::Core::Math { */ template - TVector2 ReflectV(TVector2& v1, const TVector2& normal) + TVector2 ReflectV(TVector2& v1, const TVector2& normal) { Set(v1, v1 - (2 * (v1 * normal) * normal)); @@ -726,7 +697,7 @@ namespace Phanes::Core::Math { */ template - TVector2 Set(TVector2& v1, const TVector2& v2) + TVector2 Set(TVector2& v1, const TVector2& v2) { v1 = v2; @@ -741,7 +712,7 @@ namespace Phanes::Core::Math { */ template - TVector2 Set(TVector2& v1, T x, T y) + TVector2 Set(TVector2& v1, T x, T y) { v1.x = x; v1.y = y; @@ -758,7 +729,7 @@ namespace Phanes::Core::Math { */ template - TVector2 RotateV(TVector2& v1, T angle) + TVector2 RotateV(TVector2& v1, T angle) { float sinAngle = sin(angle); float cosAngle = cos(angle); @@ -781,7 +752,7 @@ namespace Phanes::Core::Math { */ template - FORCEINLINE TVector2 ClockwiseRotateV(TVector2& v1, T angle) + FORCEINLINE TVector2 ClockwiseRotateV(TVector2& v1, T angle) { RotateV(v1, -angle); @@ -795,7 +766,7 @@ namespace Phanes::Core::Math { */ template - TVector2 NegateV(TVector2& v1) + TVector2 NegateV(TVector2& v1) { v1.x = -v1.x; v1.y = -v1.y; @@ -811,7 +782,7 @@ namespace Phanes::Core::Math { */ template - inline bool IsNormalized(const TVector2& v1, T threshold = P_FLT_INAC) + inline bool IsNormalized(const TVector2& v1, T threshold = P_FLT_INAC) { return (SqrMagnitude(v1) < threshold); } @@ -829,7 +800,7 @@ namespace Phanes::Core::Math { */ template - inline bool IsPerpendicular(const TVector2& v1, const TVector2& v2, T threshold = P_FLT_INAC) + inline bool IsPerpendicular(const TVector2& v1, const TVector2& v2, T threshold = P_FLT_INAC) { return (abs(DotP(v1, v2)) < threshold); } @@ -847,7 +818,7 @@ namespace Phanes::Core::Math { */ template - inline bool IsParallel(const TVector2& v1, const TVector2& v2, T threshold = 1.0f - P_FLT_INAC) + inline bool IsParallel(const TVector2& v1, const TVector2& v2, T threshold = 1.0f - P_FLT_INAC) { return (abs(DotP(v1, v2)) > threshold); } @@ -865,7 +836,7 @@ namespace Phanes::Core::Math { */ template - inline bool IsCoincident(const TVector2& v1, const TVector2& v2, T threshold = 1.0f - P_FLT_INAC) + inline bool IsCoincident(const TVector2& v1, const TVector2& v2, T threshold = 1.0f - P_FLT_INAC) { return (DotP(v1, v2) > threshold); } @@ -880,7 +851,7 @@ namespace Phanes::Core::Math { */ // - //Matrix2 OuterProduct(const TVector2& v1, const TVector2& v2); + //Matrix2 OuterProduct(const TVector2& v1, const TVector2& v2); // ============================================================== // @@ -898,9 +869,9 @@ namespace Phanes::Core::Math { */ template - TVector2 Reflect(const TVector2& v1, const TVector2& normal) + TVector2 Reflect(const TVector2& v1, const TVector2& normal) { - return TVector2(v1 - (2 * (v1 * normal) * normal)); + return TVector2(v1 - (2 * (v1 * normal) * normal)); } /** @@ -913,9 +884,9 @@ namespace Phanes::Core::Math { */ template - TVector2 Scale(const TVector2& v1, const TVector2& v2) + TVector2 Scale(const TVector2& v1, const TVector2& v2) { - return TVector2(v1.x * v2.x, v1.y * v2.y); + return TVector2(v1.x * v2.x, v1.y * v2.y); } /** @@ -927,9 +898,9 @@ namespace Phanes::Core::Math { */ template - TVector2 CompInverse(const TVector2& v1) + TVector2 CompInverse(const TVector2& v1) { - return TVector2(1.0f / v1.x, 1.0f / v1.y); + return TVector2(1.0f / v1.x, 1.0f / v1.y); } /** @@ -941,9 +912,9 @@ namespace Phanes::Core::Math { */ template - TVector2 Negate(const TVector2& v1) + TVector2 Negate(const TVector2& v1) { - return TVector2(-v1.x, -v1.y); + return TVector2(-v1.x, -v1.y); } /** @@ -955,9 +926,9 @@ namespace Phanes::Core::Math { */ template - TVector2 GetPerpendicular(const TVector2& v1) + TVector2 GetPerpendicular(const TVector2& v1) { - return TVector2(v1.y, -v1.x); + return TVector2(v1.y, -v1.x); } /** @@ -969,9 +940,9 @@ namespace Phanes::Core::Math { */ template - TVector2 GetReversePerpendicular(const TVector2& v1) + TVector2 GetReversePerpendicular(const TVector2& v1) { - return TVector2(-v1.y, v1.x); + return TVector2(-v1.y, v1.x); } /** @@ -984,9 +955,9 @@ namespace Phanes::Core::Math { */ template - TVector2 Min(const TVector2& v1, const TVector2& v2) + TVector2 Min(const TVector2& v1, const TVector2& v2) { - return TVector2(Phanes::Core::Math::Min(v1.x, v2.x), Phanes::Core::Math::Min(v1.y, v2.y)); + return TVector2(Phanes::Core::Math::Min(v1.x, v2.x), Phanes::Core::Math::Min(v1.y, v2.y)); } /** @@ -999,9 +970,9 @@ namespace Phanes::Core::Math { */ template - TVector2 Max(const TVector2& v1, const TVector2& v2) + TVector2 Max(const TVector2& v1, const TVector2& v2) { - return TVector2(Phanes::Core::Math::Max(v1.x, v2.x), Phanes::Core::Math::Max(v1.y, v2.y)); + return TVector2(Phanes::Core::Math::Max(v1.x, v2.x), Phanes::Core::Math::Max(v1.y, v2.y)); } /** @@ -1013,7 +984,7 @@ namespace Phanes::Core::Math { */ template - TVector2 Normalize(const TVector2& v1) + TVector2 Normalize(const TVector2& v1) { float vecNorm = Magnitude(v1); return (vecNorm < P_FLT_INAC) ? PZeroVector2(T) : (v1 / vecNorm); @@ -1029,7 +1000,7 @@ namespace Phanes::Core::Math { */ template - TVector2 UnsafeNormalize(const TVector2& v1) + TVector2 UnsafeNormalize(const TVector2& v1) { return (v1 / Magnitude(v1)); } @@ -1043,9 +1014,9 @@ namespace Phanes::Core::Math { */ template - TVector2 SignVector(const TVector2& v1) + TVector2 SignVector(const TVector2& v1) { - return TVector2((v1.x >= 0) ? 1 : -1, (v1.y >= 0) ? 1 : -1); + return TVector2((v1.x >= 0) ? 1 : -1, (v1.y >= 0) ? 1 : -1); } /** @@ -1058,7 +1029,7 @@ namespace Phanes::Core::Math { */ template - TVector2 BindToSquare(const TVector2& v1, T radius) + TVector2 BindToSquare(const TVector2& v1, T radius) { float k = (abs(v1.x) > abs(v1.y)) ? abs(radius / v1.x) : abs(radius / v1.y); return v1 * k; @@ -1074,7 +1045,7 @@ namespace Phanes::Core::Math { */ template - TVector2 ClampToSquare(const TVector2& v1, T radius) + TVector2 ClampToSquare(const TVector2& v1, T radius) { float prime = (abs(v1.x) > abs(v1.y)) ? v1.x : v1.y; float k = (prime > radius) ? abs(radius / prime) : 1.0f; @@ -1095,7 +1066,7 @@ namespace Phanes::Core::Math { */ template - TVector2 Lerp(const TVector2& startVec, const TVector2& destVec, T t) + TVector2 Lerp(const TVector2& startVec, const TVector2& destVec, T t) { t = Phanes::Core::Math::Clamp(t, (T)0.0, (T)1.0); @@ -1115,7 +1086,7 @@ namespace Phanes::Core::Math { */ template - TVector2 LerpUnclamped(const TVector2& startVec, const TVector2& destVec, T t) + TVector2 LerpUnclamped(const TVector2& startVec, const TVector2& destVec, T t) { return (t * destVec) + ((1 - t) * startVec); } @@ -1132,12 +1103,12 @@ namespace Phanes::Core::Math { */ template - TVector2 Rotate(const TVector2& v1, T angle) + TVector2 Rotate(const TVector2& v1, T angle) { float sinAngle = sin(angle); float cosAngle = cos(angle); - return TVector2(v1.x * cosAngle - v1.y * sinAngle, + return TVector2(v1.x * cosAngle - v1.y * sinAngle, v1.y * cosAngle + v1.x * sinAngle); } @@ -1152,11 +1123,15 @@ namespace Phanes::Core::Math { */ template - TVector2 ClockwiseRotate(const TVector2& v1, T angle) + TVector2 ClockwiseRotate(const TVector2& v1, T angle) { return Rotate(v1, -angle); } } // phanes::core::math::coretypes -#endif // !VECTOR2_H \ No newline at end of file +#endif // !VECTOR2_H + + + +#include "Core/public/Math/Vector2.inl" \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/Vector2.inl b/Engine/Source/Runtime/Core/public/Math/Vector2.inl new file mode 100644 index 0000000..afe1db0 --- /dev/null +++ b/Engine/Source/Runtime/Core/public/Math/Vector2.inl @@ -0,0 +1,205 @@ +#pragma once + +#include "Core/public/Math/Boilerplate.h" + +#include "Core/public/Math/Detail/Vector2Decl.inl" +#include "Core/public/Math/SIMD/SIMDIntrinsics.h" + +#include "Core/public/Math/SIMD/PhanesSIMDTypes.h" + + + +namespace Phanes::Core::Math +{ + template + TVector2::TVector2(const TVector2& v) + { + Detail::construct_vec2::value>::map(*this, v); + } + + template + TVector2::TVector2(Real _x, Real _y) + { + Detail::construct_vec2::value>::map(*this, _x, _y); + } + + template + TVector2::TVector2(Real s) + { + Detail::construct_vec2::value>::map(*this, s); + } + + template + TVector2::TVector2(const Real* comp) + { + Detail::construct_vec2::value>::map(*this, comp); + } + + + + + template + TVector2 operator+=(TVector2& v1, const TVector2& v2) + { + Detail::compute_vec2_add::value>::map(v1, v1, v2); + return v1; + } + + template + TVector2 operator+=(TVector2& v1, T s) + { + Detail::compute_vec2_add::value>::map(v1, v1, s); + return v1; + } + + template + TVector2 operator-=(TVector2& v1, const TVector2& v2) + { + Detail::compute_vec2_sub::value>::map(v1, v1, v2); + return v1; + } + + template + TVector2 operator-=(TVector2& v1, T s) + { + Detail::compute_vec2_sub::value>::map(v1, v1, s); + return v1; + } + + template + TVector2 operator*=(TVector2& v1, const TVector2& v2) + { + Detail::compute_vec2_mul::value>::map(v1, v1, v2); + return v1; + } + + template + TVector2 operator*=(TVector2& v1, T s) + { + Detail::compute_vec2_mul::value>::map(v1, v1, s); + return v1; + } + + template + TVector2 operator/=(TVector2& v1, const TVector2& v2) + { + Detail::compute_vec2_div::value>::map(v1, v1, v2); + return v1; + } + + template + TVector2 operator/=(TVector2& v1, T s) + { + Detail::compute_vec2_div::value>::map(v1, v1, s); + return v1; + } + + template + TVector2 operator+(TVector2& v1, const TVector2& v2) + { + TVector2 r; + Detail::compute_vec2_add::value>::map(r, v1, v2); + return r; + } + + template + TVector2 operator+(TVector2& v1, T s) + { + TVector2 r; + Detail::compute_vec2_add::value>::map(r, v1, s); + return r; + } + + template + TVector2 operator-(TVector2& v1, const TVector2& v2) + { + TVector2 r; + Detail::compute_vec2_sub::value>::map(r, v1, v2); + return r; + } + + template + TVector2 operator-(TVector2& v1, T s) + { + TVector2 r; + Detail::compute_vec2_sub::value>::map(r, v1, s); + return r; + } + + template + TVector2 operator*(TVector2& v1, const TVector2& v2) + { + TVector2 r; + Detail::compute_vec2_mul::value>::map(r, v1, v2); + return r; + } + + template + TVector2 operator*(TVector2& v1, T s) + { + TVector2 r; + Detail::compute_vec2_mul::value>::map(r, v1, s); + return r; + } + + template + TVector2 operator/(TVector2& v1, const TVector2& v2) + { + TVector2 r; + Detail::compute_vec2_div::value>::map(r, v1, v2); + return r; + } + + template + TVector2 operator/(TVector2& v1, T s) + { + TVector2 r; + Detail::compute_vec2_div::value>::map(r, v1, s); + return r; + } + + // Comparision + + template + bool operator==(const TVector2& v1, const TVector2& v2) + { + return Detail::compute_vec2_eq::value>::map(v1, v2); + } + + template + bool operator!=(const TVector2& v1, const TVector2& v2) + { + return Detail::compute_vec2_ieq::value>::map(v1, v2); + } + + + + // Inc- / Decrement + + + template + TVector2& operator++(TVector2& v1) + { + Detail::compute_vec2_inc::value>::map(v1); + return v1; + } + + template + TVector2& operator--(TVector2& v1) + { + Detail::compute_vec2_inc::value>::map(v1); + return v1; + } + + template + TVector2& operator++(TVector2& v1, int) + { + return ++v1; + } + + template + TVector2& operator--(TVector2& v1, int) + { + return --v1; + } +} \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/Vector3.hpp b/Engine/Source/Runtime/Core/public/Math/Vector3.hpp index 4e4a3a5..9f92ad1 100644 --- a/Engine/Source/Runtime/Core/public/Math/Vector3.hpp +++ b/Engine/Source/Runtime/Core/public/Math/Vector3.hpp @@ -34,40 +34,6 @@ namespace Phanes::Core::Math { template struct TVector3 : public TVector4 { public: - //using Real = T; - //union - //{ - // struct { - // /// - // /// X component of vector - // /// - // Real x; - - // /// - // /// X component of vector - // /// - // Real y; - - // /// - // /// Z component of vector - // /// - // Real z; - - // /// - // /// W component of vector - // /// - // Real w; - - // }; - // /// - // /// Wraps components in one array / xmm register. - // /// - // union - // { - // typename SIMD::Storage<4, Real, SIMD::use_simd::value>::type comp; - // typename SIMD::Storage<4, Real, SIMD::use_simd::value>::type data; - // }; - //}; using Real = T; @@ -107,7 +73,7 @@ namespace Phanes::Core::Math { /// /// Vector /// Scalar - TVector3(const TVector2& v, Real s); + TVector3(const TVector2& v, Real s); }; diff --git a/Engine/Source/Runtime/Core/public/Math/Vector3.inl b/Engine/Source/Runtime/Core/public/Math/Vector3.inl index 4e87bdc..3d82ee2 100644 --- a/Engine/Source/Runtime/Core/public/Math/Vector3.inl +++ b/Engine/Source/Runtime/Core/public/Math/Vector3.inl @@ -30,9 +30,9 @@ namespace Phanes::Core::Math } template - TVector3::TVector3(const TVector2& v1, Real s) + TVector3::TVector3(const TVector2& v1, Real s) { - Detail::construct_vec3::value>::map(*this, v1.x, v1.y, s); + Detail::construct_vec3::value>::map(*this, v1, s); } template diff --git a/Engine/Source/Runtime/Core/public/Math/Vector4.hpp b/Engine/Source/Runtime/Core/public/Math/Vector4.hpp index abad58d..c1f1fa2 100644 --- a/Engine/Source/Runtime/Core/public/Math/Vector4.hpp +++ b/Engine/Source/Runtime/Core/public/Math/Vector4.hpp @@ -84,7 +84,7 @@ namespace Phanes::Core::Math /// /// TVector2 one /// TVector2 two - TVector4(const TVector2& v1, const TVector2& v2); + TVector4(const TVector2& v1, const TVector2& v2); /// /// Construct vector from array of components diff --git a/Engine/Source/Runtime/Core/public/Math/Vector4.inl b/Engine/Source/Runtime/Core/public/Math/Vector4.inl index 0ff1b1c..f39d685 100644 --- a/Engine/Source/Runtime/Core/public/Math/Vector4.inl +++ b/Engine/Source/Runtime/Core/public/Math/Vector4.inl @@ -29,7 +29,7 @@ namespace Phanes::Core::Math } template - Phanes::Core::Math::TVector4::TVector4(const TVector2& v1, const TVector2& v2) + Phanes::Core::Math::TVector4::TVector4(const TVector2& v1, const TVector2& v2) { Detail::construct_vec4::value>::map(*this, v1, v2); }