diff --git a/Engine/Source/Runtime/Core/public/Math/Detail/Vector4Decl.inl b/Engine/Source/Runtime/Core/public/Math/Detail/Vector4Decl.inl new file mode 100644 index 0000000..2352cea --- /dev/null +++ b/Engine/Source/Runtime/Core/public/Math/Detail/Vector4Decl.inl @@ -0,0 +1,167 @@ +#pragma once + +#include "Core/public/Math/Boilerplate.h" +#include "Core/public/Math/MathCommon.hpp" +#include "Core/public/Math/MathFwd.h" + +namespace Phanes::Core::Math::Detail +{ + template + struct compute_vec4_add {}; + + template + struct compute_vec4_sub {}; + + template + struct compute_vec4_mul {}; + + template + struct compute_vec4_div {}; + + template + struct compute_vec4_eq{}; + + template + struct compute_vec4_ieq {}; + + template + struct compute_vec4_inc {}; + + template + struct compute_vec4_dec {}; + + + template + struct compute_vec4_add + { + static constexpr void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + r.x = v1.x + v2.x; + r.y = v1.y + v2.y; + r.z = v1.z + v2.z; + r.w = v1.w + v2.w; + } + + static constexpr void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, T s) + { + r.x = v1.x + s; + r.y = v1.y + s; + r.z = v1.z + s; + r.w = v1.w + s; + } + }; + + + template + struct compute_vec4_sub + { + static constexpr void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + r.x = v1.x - v2.x; + r.y = v1.y - v2.y; + r.z = v1.z - v2.z; + r.w = v1.w - v2.w; + } + + static constexpr void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, T s) + { + r.x = v1.x - s; + r.y = v1.y - s; + r.z = v1.z - s; + r.w = v1.w - s; + } + }; + + + template + struct compute_vec4_mul + { + static constexpr void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + r.x = v1.x * v2.x; + r.y = v1.y * v2.y; + r.z = v1.z * v2.z; + r.w = v1.w * v2.w; + } + + static constexpr void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, T s) + { + r.x = v1.x * s; + r.y = v1.y * s; + r.z = v1.z * s; + r.w = v1.w * s; + } + }; + + + template + struct compute_vec4_div + { + static constexpr void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + r.x = v1.x / v2.x; + r.y = v1.y / v2.y; + r.z = v1.z / v2.z; + r.w = v1.w / v2.w; + } + + static constexpr void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, T s) + { + s = (T)1.0 / s; + + r.x = v1.x * s; + r.y = v1.y * s; + r.z = v1.z * s; + r.w = v1.w * s; + } + }; + + template + struct compute_vec4_eq + { + static constexpr bool map(const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + return (Phanes::Core::Math::Abs(v1.x - v2.x) < P_FLT_INAC && + Phanes::Core::Math::Abs(v1.y - v2.y) < P_FLT_INAC && + Phanes::Core::Math::Abs(v1.z - v2.z) < P_FLT_INAC && + Phanes::Core::Math::Abs(v1.w - v2.w) < P_FLT_INAC); + } + }; + + template + struct compute_vec4_ieq + { + static constexpr bool map(const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + return (Phanes::Core::Math::Abs(v1.x - v2.x) > P_FLT_INAC || + Phanes::Core::Math::Abs(v1.y - v2.y) > P_FLT_INAC || + Phanes::Core::Math::Abs(v1.z - v2.z) > P_FLT_INAC || + Phanes::Core::Math::Abs(v1.w - v2.w) > P_FLT_INAC); + } + }; + + template + struct compute_vec4_inc + { + static constexpr void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1) + { + r.x = v1.x + 1; + r.y = v1.y + 1; + r.z = v1.z + 1; + r.w = v1.w + 1; + } + }; + + template + struct compute_vec4_dec + { + static constexpr void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1) + { + r.x = v1.x - 1; + r.y = v1.y - 1; + r.z = v1.z - 1; + r.w = v1.w - 1; + } + }; +} + diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesSIMDTypes.h b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesSIMDTypes.h new file mode 100644 index 0000000..b614e0c --- /dev/null +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesSIMDTypes.h @@ -0,0 +1,227 @@ +// This file includes the necessary header for vectorization intrinsics. If no specifics are defined SSE4.2 is used. +// +// ARM is not supported. + +#include "Core/public/Math/SIMD/Platform.h" +#include "Core/public/Math/MathTypes.h" + +#if P_INTRINSICS == P_INTRINSICS_AVX2 +# include +#elif P_INTRINSICS == P_INTRINSICS_AVX +# include +#elif P_INTRINSICS == P_INTRINSICS_SSE +# include +#elif P_INTRINSICS == P_INTRINSICS_NEON +# include "neon.h" // <- Not supported +#endif + +// use_simd for metaprogramming +namespace Phanes::Core::Math::SIMD +{ + + /// + /// This decides, whether simd operations should be used, based on the vector type, it's size, the vector alignment and whether the right extension can be loaded during compiletime. + /// + /// Type of vector + /// Length of vector + /// Whether SIMD intrinsics exist, that support the vector type and length. + /// Whether the vector is aligned for simd usage. + template + struct use_simd + { + bool value = false; + }; + + + // SSE / NEON + + template<> + struct use_simd + { + bool value = true && (P_SSE__ || P_NEON__); + }; + + template<> + struct use_simd + { + bool value = true && (P_SSE__ || P_NEON__); + }; + + template<> + struct use_simd + { + bool value = true && (P_SSE__ || P_NEON__); + }; + + template<> + struct use_simd + { + bool value = true && (P_SSE__ || P_NEON__); + }; + + template<> + struct use_simd + { + bool value = true && (P_SSE__ || P_NEON__); + }; + + template<> + struct use_simd + { + bool value = true && (P_SSE__ || P_NEON__); + }; + + // SSE + + template<> + struct use_simd + { + bool value = true && P_SSE__; + }; + + template<> + struct use_simd + { + bool value = true && P_SSE__; + }; + + template<> + struct use_simd + { + bool value = true && P_SSE__; + }; + + + + // AVX + + template<> + struct use_simd + { + bool value = true && P_AVX__; + }; + + template<> + struct use_simd + { + bool value = true && P_AVX__; + }; + + template<> + struct use_simd + { + bool value = true && P_AVX__; + }; + + + // AVX2 + + template<> + struct use_simd + { + bool value = true && P_AVX2__; + }; + + template<> + struct use_simd + { + bool value = true && P_AVX2__; + }; + + template<> + struct use_simd + { + bool value = true && P_AVX2__; + }; + + template<> + struct use_simd + { + bool value = true && P_AVX2__; + }; + + template<> + struct use_simd + { + bool value = true && P_AVX2__; + }; + + template<> + struct use_simd + { + bool value = true && P_AVX2__; + }; +} + +// Register aliases +namespace Phanes::Core::Types +{ + +#if P_INTRINSICS >= 1 + + typedef __m128 Vec4f32Reg; + typedef __m128d Vec2f64Reg; + + typedef __m128i Vec4i32Reg; + typedef __m128i Vec2i64Reg; + + typedef __m128i Vec4u32Reg; + typedef __m128i Vec2u64Reg; + +#elif P_INTRINSICS != P_INTRINSICS_NEON + + typedef struct alignas(16) Vec4f32Reg { float data[4]; } Vec4f32Reg; + typedef struct alignas(16) Vec2f64Reg { double data[2]; } Vec2f64Reg; + typedef struct alignas(16) Vec4i32Reg { int data[4]; } Vec4i32Reg; + typedef struct alignas(16) Vec2i64Reg { Phanes::Core::Types::int64 data[2]; } Vec2i64Reg; + typedef struct alignas(16) Vec4u32Reg { unsigned int data[4]; } Vec4u32Reg; + typedef struct alignas(16) Vec2u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec2u64Reg; + +#endif + + +#if P_INTRINSICS >= 2 + + typedef __m256 Vec4x2f32Reg; + typedef __m256 Vec8f32Reg; + typedef __m256d Vec2x2f64Reg; + typedef __m256d Vec4f64Reg; + +#elif P_INTRINSICS != P_INTRINSICS_NEON + + typedef struct alignas(32) Vec4x2f32Reg { float data[8]; } Vec4x2f32Reg; + typedef struct alignas(32) Vec8f32Reg { float data[8]; } Vec8f32Reg; + typedef struct alignas(32) Vec2x2f64Reg { double data[4]; } Vec2x2f64Reg; + typedef struct alignas(32) Vec4f64Reg { double data[4]; } Vec4f64Reg; + +#endif + + +#if P_INTRINSICS == 3 + + typedef __m256i Vec4x2i32Reg; + typedef __m256i Vec8i32Reg; + typedef __m256i Vec2x2i64Reg; + typedef __m256i Vec4i64Reg; + + typedef __m256i Vec4x2u32Reg; + typedef __m256i Vec8u32Reg; + typedef __m256i Vec2x2u64Reg; + typedef __m256i Vec4u64Reg; + +#elif P_INTRINSICS != P_INTRINSICS_NEON + + typedef struct alignas(32) Vec4x2i32Reg { int data[8]; } Vec4x2i32Reg; + typedef struct alignas(32) Vec8i32Reg { int data[8]; } Vec8i32Reg; + typedef struct alignas(32) Vec2x2i64Reg { Phanes::Core::Types::int64 data[4]; } Vec2x2i64Reg; + typedef struct alignas(32) Vec4i64Reg { Phanes::Core::Types::int64 data[4]; } Vec4i64Reg; + + typedef struct alignas(32) Vec4x2u32Reg { unsigned int data[8]; } Vec4x2u32Reg; + typedef struct alignas(32) Vec8u32Reg { unsigned int data[8]; } Vec8u32Reg; + typedef struct alignas(32) Vec2x2u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec2x2u64Reg; + typedef struct alignas(32) Vec4u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec4u64Reg; + +#endif + + // NEON ... +} \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp index 6f70f09..1a370f1 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp @@ -1 +1 @@ -#pragma once +#pragma once \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp index 4249084..5901cb3 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp @@ -1,5 +1,25 @@ #pragma once +#include "Core/public/Math/Boilerplate.h" #include +// -> For IntelliSense +// ============ // +// TVector4 // +// ============ // + + +namespace Phanes::Core::Math::Detail +{ + // Template class has already been defined and is included through: Storage.h -> Vector4.hpp -> SIMDIntrinsics.h -> PhanesVectorMathSEE.hpp + + template<> + struct compute_vec4_add + { + static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2) + { + r.comp = _mm_add_ps(v1.comp, v2.comp); + } + }; +} \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h b/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h index 0b9e65a..c15b890 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h @@ -254,6 +254,11 @@ // Define also supported instruction sets for Visual Studio, as it only defines the latest (e.g. only __AVX__ not __SSE4__ ...). +#define P_AVX2__ 0 +#define P_AVX__ 0 +#define P_SSE__ 0 +#define P_NEON__ 0 + #ifdef P_FORCE_INTRINSICS # undef __AVX2__ @@ -267,22 +272,22 @@ #else # ifdef __AVX2__ -# define P_AVX2__ +# define P_AVX2__ 1 # elif defined(__AVX__) -# define P_AVX__ +# define P_AVX__ 1 # elif defined(__SSE__) -# define P_SSE__ +# define P_SSE__ 1 # endif #endif // !P_FORCE_INTRINSICS #ifdef P_AVX2__ -# define P_AVX__ +# define P_AVX__ 1 #endif #ifdef P_AVX__ -# define P_SSE__ +# define P_SSE__ 1 #endif @@ -300,14 +305,15 @@ # undef P_SSE__ # undef P_SSE__ #else -# if defined(P_AVX__) && !defined(P_AVX2__) -# define P_INTRINSICS P_INTRINSICS_AVX -# elif defined(P_AVX2__) -# define P_INTRINSICS P_INTRINSICS_AVX2 -# elif (defined(__SSE__) || defined(P_SSE__)) && !defined(P_AVX__) -# define P_INTRINSICS P_INTRINSICS_SSE +# if (P_AVX__ == 1) && (P_AVX2__ == 0) +# define P_INTRINSICS P_INTRINSICS_AVX +# elif P_AVX2__ == 1 +# define P_INTRINSICS P_INTRINSICS_AVX2 +# elif P_SSE__ == 1 +# define P_INTRINSICS P_INTRINSICS_SSE # elif defined(P_ARM_ARCH) -# define P_INTRINSICS P_INTRINSICS_NEON +# define P_INTRINSICS P_INTRINSICS_NEON +# define P_NEON__ 1 # elif !defined(P_FORCE_INTRINSICS) # error No SIMD instruction set detected. Use P_FORCE_FPU to disable SIMD extensions. # endif diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/SIMDIntrinsics.h b/Engine/Source/Runtime/Core/public/Math/SIMD/SIMDIntrinsics.h new file mode 100644 index 0000000..6784627 --- /dev/null +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/SIMDIntrinsics.h @@ -0,0 +1,17 @@ +#pragma once + + +#include "Core/public/Math/SIMD/Platform.h" + +#if P_INTRINSICS == P_INTRINSICS_AVX2 +# include "PhanesVectorMathAVX2.hpp" +#elif P_INTRINSICS == P_INTRINSICS_AVX +# include "PhanesVectorMathAVX.hpp" +#elif P_INTRINSICS == P_INTRINSICS_SSE +# include "PhanesVectorMathSSE.hpp" +#elif P_INTRINSICS == P_INTRINSICS_NEON +# include "PhanesVectorMathNeon.hpp" +#elif P_INTRINSICS == P_INTRINSICS_FPU +# include "PhanesVectorMathFPU.hpp" +#endif + diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/Storage.h b/Engine/Source/Runtime/Core/public/Math/SIMD/Storage.h index fa2b854..f46ce25 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/Storage.h +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/Storage.h @@ -1,13 +1,13 @@ // Defines on compile time, whether a xmm register or an array should be used. #pragma once -#include "Core/public/Math/SIMD/PhanesSIMD.h" +#include "Core/public/Math/SIMD/PhanesSIMDTypes.h" #include "Core/public/Math/MathTypes.h" namespace Phanes::Core::SIMD { - template + template struct Storage; // General unaligned memory storage @@ -142,38 +142,4 @@ namespace Phanes::Core::SIMD { typedef Phanes::Core::Types::Vec4x2u32Reg type; }; -} - - -struct Vec4 -{ -public: - union - { - struct - { - - int x, y, z, w; - - }; - - typename Phanes::Core::SIMD::Storage<4, Phanes::Core::Types::int32, true>::type comp; - }; -}; - -struct Vec4x2 -{ -public: - union - { - struct - { - - Vec4 v1; - Vec4 v2; - - }; - - typename Phanes::Core::SIMD::Storage<8, Phanes::Core::Types::int32, true>::type comp; - }; -}; \ No newline at end of file +} \ No newline at end of file