diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesSIMDTypes.h b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesSIMDTypes.h index b614e0c..6ff9116 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesSIMDTypes.h +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesSIMDTypes.h @@ -1,3 +1,5 @@ +#pragma once + // This file includes the necessary header for vectorization intrinsics. If no specifics are defined SSE4.2 is used. // // ARM is not supported. diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp index 5901cb3..f0815c7 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp @@ -1,10 +1,30 @@ #pragma once -#include "Core/public/Math/Boilerplate.h" #include +#include "Core/public/Math/SIMD/PhanesSIMDTypes.h" +#include "Core/public/Math/Boilerplate.h" +#include "Core/public/Math/MathCommon.hpp" + +#include + // -> For IntelliSense +#include "Core/public/Math/Vector4.hpp" + +// ========== // +// Common // +// ========== // + + +Phanes::Core::Types::Vec4f32Reg p_vec4_abs(const Phanes::Core::Types::Vec4f32Reg& v) +{ + return _mm_and_ps(v, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF))); +} + + + + // ============ // // TVector4 // // ============ // @@ -22,4 +42,54 @@ namespace Phanes::Core::Math::Detail r.comp = _mm_add_ps(v1.comp, v2.comp); } }; -} \ No newline at end of file + + template<> + struct compute_vec4_sub + { + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + r.comp = _mm_sub_ps(v1.comp, v2.comp); + } + }; + + template<> + struct compute_vec4_mul + { + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + r.comp = _mm_mul_ps(v1.comp, v2.comp); + } + }; + + template<> + struct compute_vec4_div + { + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + r.comp = _mm_div_ps(v1.comp, v2.comp); + } + }; + + template<> + struct compute_vec4_eq + { + static FORCEINLINE bool map(const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + float r; + _mm_store_ps1(&r, _mm_cmpeq_ps(v1.comp, v2.comp)); + return (r == 0xffffffff) ? true : false; + } + }; + + template<> + struct compute_vec4_ieq + { + static FORCEINLINE bool map(const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + float r; + _mm_store_ps1(&r, _mm_cmpneq_ps(v1.comp, v2.comp)); + return (r == 0xffffffff) ? true : false; + } + }; +} + diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h b/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h index c15b890..a2b8b61 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h @@ -254,11 +254,6 @@ // Define also supported instruction sets for Visual Studio, as it only defines the latest (e.g. only __AVX__ not __SSE4__ ...). -#define P_AVX2__ 0 -#define P_AVX__ 0 -#define P_SSE__ 0 -#define P_NEON__ 0 - #ifdef P_FORCE_INTRINSICS # undef __AVX2__ @@ -272,7 +267,7 @@ #else # ifdef __AVX2__ -# define P_AVX2__ 1 +# define P_AVX2__ 1 # elif defined(__AVX__) # define P_AVX__ 1 # elif defined(__SSE__) @@ -290,6 +285,23 @@ # define P_SSE__ 1 #endif +// Deactivate unset SIMD +#ifndef P_AVX2__ +# define P_AVX2__ 0 +#endif + +// Deactivate unset SIMD +#ifndef P_AVX__ +# define P_AVX__ 0 +#endif + +#ifndef P_SSE__ +# define P_SSE__ 0 +#endif + +#ifndef P_NEON__ +# define P_NEON__ 0 +#endif #define P_INTRINSICS_FPU 0 #define P_INTRINSICS_SSE 1 diff --git a/Engine/Source/Runtime/Core/public/Math/Vector4.hpp b/Engine/Source/Runtime/Core/public/Math/Vector4.hpp index 63bcd2f..81f771d 100644 --- a/Engine/Source/Runtime/Core/public/Math/Vector4.hpp +++ b/Engine/Source/Runtime/Core/public/Math/Vector4.hpp @@ -90,13 +90,6 @@ namespace Phanes::Core::Math /// Array of at least 4 components TVector4(const Real* comp); - /// - /// Construct the vector, by calculating the way between two points. - /// - /// Starting point of the vector. - /// End point of the vector. - TVector4(const TPoint4& start, const TPoint4& end); - }; // ===================== // @@ -698,8 +691,5 @@ namespace Phanes::Core::Math TVector4 PrespectiveDivideV(TVector4& v1); } -// No SIMD -#include "Core/public/Math/Vector4.inl" -// SIMD -#include "Core/public/Math/SIMD/SIMDIntrinsics.h" \ No newline at end of file +#include "Core/public/Math/Vector4.inl" diff --git a/Engine/Source/Runtime/Core/public/Math/Vector4.inl b/Engine/Source/Runtime/Core/public/Math/Vector4.inl index 4f2cfcc..f419752 100644 --- a/Engine/Source/Runtime/Core/public/Math/Vector4.inl +++ b/Engine/Source/Runtime/Core/public/Math/Vector4.inl @@ -3,8 +3,13 @@ #include "Core/public/Math/Boilerplate.h" #include "Core/public/Math/Detail/Vector4Decl.inl" +#include "Core/public/Math/SIMD/SIMDIntrinsics.h" + #include "Core/public/Math/Vector4.hpp" + +#include "Core/public/Math/SIMD/PhanesSIMDTypes.h" + #include namespace Phanes::Core::Math @@ -25,6 +30,30 @@ namespace Phanes::Core::Math w(_w) {} + template + Phanes::Core::Math::TVector4::TVector4(Real s) : + x(s), + y(s), + z(s), + w(s) + {} + + template + Phanes::Core::Math::TVector4::TVector4(const TVector2& v1, const TVector2& v2) : + x(v1.x), + y(v1.y), + z(v2.x), + w(v2.y) + {} + + template + Phanes::Core::Math::TVector4::TVector4(const Real* comp) : + x(comp[0]), + y(comp[1]), + z(comp[2]), + w(comp[3]) + {} + template TVector4 operator+=(TVector4& v1, const TVector4& v2) { @@ -159,19 +188,87 @@ namespace Phanes::Core::Math return Detail::compute_vec4_ieq::map(v1, v2); } + + + // Inc- / Decrement + template + TVector4& operator++(TVector4& v1) + { + ++v1.x; + ++v1.y; + ++v1.z; + ++v1.w; - // SIMD + return v1; + } + template + TVector4& operator--(TVector4& v1) + { + --v1.x; + --v1.y; + --v1.z; + --v1.w; + + return v1; + } + + template + TVector4& operator++(TVector4& v1, int) + { + return ++v1; + } + + template + TVector4& operator--(TVector4& v1, int) + { + return --v1; + } + + + // SIMD constructor template<> - TVector4::TVector4(Real _x, Real _y, Real _z, Real _w) : + TVector4::TVector4(const TVector4& v) + { + this->comp = _mm_load_ps(reinterpret_cast(&v)); + } + + template<> + TVector4::TVector4(float _x, float _y, float _z, float _w) : x(_x), y(_y), z(_z), w(_w) { - this->comp = _mm_load_ps(reinterpret_cast(&x)); + this->comp = _mm_load_ps(reinterpret_cast(&this->x)); + } + + template<> + TVector4::TVector4(float s) + { + this->comp = _mm_load_ps1(&s); + } + + template<> + TVector4::TVector4(const TVector2& v1, const TVector2& v2) : + x(v1.x), + y(v1.y), + z(v2.x), + w(v2.y) + { + this->comp = _mm_load_ps(reinterpret_cast(&this->x)); + } + + template<> + TVector4::TVector4(const float* comp) : + x(comp[0]), + y(comp[1]), + z(comp[2]), + w(comp[3]) + { + this->comp = _mm_load_ps(reinterpret_cast(&this->x)); } } \ No newline at end of file