From 64225c5830093b4a984b0a1a3f38698ac0bfd35a Mon Sep 17 00:00:00 2001 From: scorpioblood <77296181+scorpioblood@users.noreply.github.com> Date: Tue, 11 Jun 2024 21:53:22 +0200 Subject: [PATCH] SIMD improvement. --- .../public/Math/Detail/IntVector3Decl.inl | 2 - .../Source/Runtime/Core/public/Math/Include.h | 1 + .../Runtime/Core/public/Math/IntVector3.hpp | 3 +- .../Runtime/Core/public/Math/IntVector3.inl | 3 +- .../Runtime/Core/public/Math/IntVector4.hpp | 2 +- .../public/Math/SIMD/PhanesVectorMathFPU.hpp | 448 ++++++++++++++++++ .../public/Math/SIMD/PhanesVectorMathSSE.hpp | 381 +++++++++------ 7 files changed, 676 insertions(+), 164 deletions(-) diff --git a/Engine/Source/Runtime/Core/public/Math/Detail/IntVector3Decl.inl b/Engine/Source/Runtime/Core/public/Math/Detail/IntVector3Decl.inl index 1e4dbd0..ae81615 100644 --- a/Engine/Source/Runtime/Core/public/Math/Detail/IntVector3Decl.inl +++ b/Engine/Source/Runtime/Core/public/Math/Detail/IntVector3Decl.inl @@ -2,8 +2,6 @@ #include "Core/public/Math/Boilerplate.h" -#include "../IntVector3.hpp" - namespace Phanes::Core::Math::Detail { template diff --git a/Engine/Source/Runtime/Core/public/Math/Include.h b/Engine/Source/Runtime/Core/public/Math/Include.h index 3080630..460ac8c 100644 --- a/Engine/Source/Runtime/Core/public/Math/Include.h +++ b/Engine/Source/Runtime/Core/public/Math/Include.h @@ -4,4 +4,5 @@ #include "Core/public/Math/Vector2.hpp" // <-- Includes Vector3/4 automatically #include "Core/public/Math/IntVector2.hpp" +#include "Core/public/Math/IntVector3.hpp" #include "Core/public/Math/IntVector4.hpp" \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/IntVector3.hpp b/Engine/Source/Runtime/Core/public/Math/IntVector3.hpp index d56696f..248b718 100644 --- a/Engine/Source/Runtime/Core/public/Math/IntVector3.hpp +++ b/Engine/Source/Runtime/Core/public/Math/IntVector3.hpp @@ -73,7 +73,7 @@ namespace Phanes::Core::Math { TIntVector3(const T* comp); - TIntVector3(const TIntVector2& v1, const T s) + TIntVector3(const TIntVector2& v1, const T s); }; @@ -732,3 +732,4 @@ namespace Phanes::Core::Math { #endif // !INTVECTOR3_H +#include "Core/public/Math/IntVector3.inl" \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/IntVector3.inl b/Engine/Source/Runtime/Core/public/Math/IntVector3.inl index fcc7093..26daf56 100644 --- a/Engine/Source/Runtime/Core/public/Math/IntVector3.inl +++ b/Engine/Source/Runtime/Core/public/Math/IntVector3.inl @@ -7,7 +7,6 @@ #include "Core/public/Math/SIMD/PhanesSIMDTypes.h" -#include "IntVector3.hpp" namespace Phanes::Core::Math { @@ -20,7 +19,7 @@ namespace Phanes::Core::Math template TIntVector3::TIntVector3(const T _x, const T _y, const T _z) { - Detail::construct_ivec3::value>::map(*this, _x, _y, _z, _w); + Detail::construct_ivec3::value>::map(*this, _x, _y, _z); } template diff --git a/Engine/Source/Runtime/Core/public/Math/IntVector4.hpp b/Engine/Source/Runtime/Core/public/Math/IntVector4.hpp index 60e74d3..037277b 100644 --- a/Engine/Source/Runtime/Core/public/Math/IntVector4.hpp +++ b/Engine/Source/Runtime/Core/public/Math/IntVector4.hpp @@ -591,7 +591,7 @@ namespace Phanes::Core::Math { (v1.x > 0) ? 1 : -1, (v1.y > 0) ? 1 : -1, (v1.z > 0) ? 1 : -1, - (v1.w > 0) ? 1 : -1, + (v1.w > 0) ? 1 : -1 ); } diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp index c2794ea..853d748 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp @@ -495,4 +495,452 @@ namespace Phanes::Core::Math::Detail }; + // =============== // + // TIntVector4 // + // =============== // + + template<> + struct construct_ivec4 + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, const TIntVector4& v2) + { + v1.x = v2.x; + v1.y = v2.y; + v1.z = v2.z; + v1.w = v2.w; + } + + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, int s) + { + v1.x = s; + v1.y = s; + v1.z = s; + v1.w = s; + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, int x, int y, int z, int w) + { + v1.x = x; + v1.y = y; + v1.y = z; + v1.y = w; + } + + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, const int* comp) + { + v1.x = comp[0]; + v1.y = comp[1]; + v1.z = comp[2]; + v1.w = comp[3]; + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2) + { + r.x = v1.x; + r.y = v1.y; + r.x = v2.x; + r.y = v2.y; + } + }; + + template<> + struct compute_ivec4_add + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.x = v1.x + v2.x; + r.y = v1.y + v2.y; + r.z = v1.z + v2.z; + r.w = v1.w + v2.w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.x = v1.x + s; + r.y = v1.y + s; + r.z = v1.z + s; + r.w = v1.w + s; + } + }; + + template<> + struct compute_ivec4_sub + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.x = v1.x - v2.x; + r.y = v1.y - v2.y; + r.z = v1.z - v2.z; + r.w = v1.w - v2.w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.x = v1.x - s; + r.y = v1.y - s; + r.z = v1.z - s; + r.w = v1.w - s; + } + }; + + template<> + struct compute_ivec4_mul + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.x = v1.x * v2.x; + r.y = v1.y * v2.y; + r.z = v1.z * v2.z; + r.w = v1.w * v2.w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.x = v1.x * s; + r.y = v1.y * s; + r.z = v1.z * s; + r.w = v1.w * s; + } + }; + + template<> + struct compute_ivec4_inc + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1) + { + r.x = v1.x + 1; + r.y = v1.y + 1; + r.z = v1.z + 1; + r.w = v1.w + 1; + } + }; + + template<> + struct compute_ivec4_dec + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1) + { + r.x = v1.x - 1; + r.y = v1.y - 1; + r.z = v1.z - 1; + r.w = v1.w - 1; + } + }; + + template<> + struct compute_ivec4_and + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.x = v1.x & v2.x; + r.y = v1.y & v2.y; + r.z = v1.z & v2.z; + r.w = v1.w & v2.w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.x = v1.x & s; + r.y = v1.y & s; + r.z = v1.z & s; + r.w = v1.w & s; + } + }; + + template<> + struct compute_ivec4_or + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.x = v1.x | v2.x; + r.y = v1.y | v2.y; + r.z = v1.z | v2.z; + r.w = v1.w | v2.w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.x = v1.x | s; + r.y = v1.y | s; + r.z = v1.z | s; + r.w = v1.w | s; + } + }; + + template<> + struct compute_ivec4_xor + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.x = v1.x ^ v2.x; + r.y = v1.y ^ v2.y; + r.z = v1.z ^ v2.z; + r.w = v1.w ^ v2.w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.x = v1.x ^ s; + r.y = v1.y ^ s; + r.z = v1.z ^ s; + r.w = v1.w ^ s; + } + }; + + template<> + struct compute_ivec4_left_shift + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.x = v1.x << v2.x; + r.y = v1.y << v2.y; + r.z = v1.z << v2.z; + r.w = v1.w << v2.w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.x = v1.x << s; + r.y = v1.y << s; + r.z = v1.z << s; + r.w = v1.w << s; + } + }; + + template<> + struct compute_ivec4_right_shift + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.x = v1.x >> v2.x; + r.y = v1.y >> v2.y; + r.z = v1.z >> v2.z; + r.w = v1.w >> v2.w; + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.x = v1.x >> s; + r.y = v1.y >> s; + r.z = v1.z >> s; + r.w = v1.w >> s; + } + }; + + + // =============== // + // TIntVector3 // + // =============== // + + + template<> + struct construct_ivec3 + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& v1, const TIntVector3& v2) + { + v1.x = v2.x; + v1.y = v2.y; + v1.z = v2.z; + v1.w = (T)0; + } + + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& v1, int s) + { + v1.x = s; + v1.y = s; + v1.z = s; + v1.w = (T)0; + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& v1, int x, int y, int z) + { + v1.x = x; + v1.y = y; + v1.y = z; + v1.w = (T)0; + } + + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& v1, const int* comp) + { + v1.x = comp[0]; + v1.y = comp[1]; + v1.z = comp[2]; + v1.w = (T)0; + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& r, const Phanes::Core::Math::TIntVector2& v1, const int s) + { + v1.x = v2.x; + v1.y = v2.y; + v1.z = s; + v1.w = (T)0; + } + }; + + + template<> struct compute_ivec3_add : public compute_ivec4_add {}; + template<> struct compute_ivec3_sub : public compute_ivec4_sub {}; + template<> struct compute_ivec3_mul : public compute_ivec4_mul {}; + template<> struct compute_ivec3_div : public compute_ivec4_div {}; + template<> struct compute_ivec3_inc : public compute_ivec4_inc {}; + template<> struct compute_ivec3_dec : public compute_ivec4_dec {}; + + + template<> struct compute_ivec3_and : public compute_ivec4_and {}; + template<> struct compute_ivec3_or : public compute_ivec4_or {}; + template<> struct compute_ivec3_xor : public compute_ivec4_xor {}; + template<> struct compute_ivec3_left_shift : public compute_ivec4_left_shift {}; + template<> struct compute_ivec3_right_shift : public compute_ivec4_right_shift {}; + + + // =============== // + // TIntVector2 // + // =============== // + + template<> + struct construct_ivec2 + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& v1, const TIntVector2& v2) + { + v1.comp = _mm_setr_epi64x(v2.x, v2.y); + } + + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& v1, Phanes::Core::Types::int64 s) + { + v1.comp = _mm_set1_epi64x(s); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& v1, Phanes::Core::Types::int64 x, Phanes::Core::Types::int64 y) + { + v1.comp = _mm_setr_epi64x(x, y); + } + + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Types::int64* comp) + { + v1.comp = _mm_loadu_epi64(comp); + } + }; + + template<> + struct compute_ivec2_add + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2) + { + r.comp = _mm_add_epi64(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, T s) + { + r.comp = _mm_add_epi64(v1.comp, _mm_set1_epi64x(s)); + } + }; + + template<> + struct compute_ivec2_sub + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2) + { + r.comp = _mm_sub_epi64(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, T s) + { + r.comp = _mm_sub_epi64(v1.comp, _mm_set1_epi64x(s)); + } + }; + + template<> + struct compute_ivec2_inc + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1) + { + r.comp = _mm_add_epi64(v1.comp, _mm_set1_epi64x(1)); + } + }; + + template<> + struct compute_ivec2_dec + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1) + { + r.comp = _mm_sub_epi64(v1.comp, _mm_set1_epi64x(1)); + } + }; + + template<> + struct compute_ivec2_and + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2) + { + r.comp = _mm_and_si128(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, T s) + { + r.comp = _mm_and_si128(v1.comp, _mm_set1_epi64x(s)); + } + }; + + template<> + struct compute_ivec2_or + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2) + { + r.comp = _mm_or_si128(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, T s) + { + r.comp = _mm_or_si128(v1.comp, _mm_set1_epi64x(s)); + } + }; + + template<> + struct compute_ivec2_xor + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2) + { + r.comp = _mm_xor_si128(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, T s) + { + r.comp = _mm_xor_si128(v1.comp, _mm_set1_epi64x(s)); + } + }; + + template<> + struct compute_ivec2_left_shift + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2) + { + r.comp = _mm_sll_epi64(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, T s) + { + r.comp = _mm_sll_epi64(v1.comp, _mm_set1_epi64x(s)); + } + }; + + template<> + struct compute_ivec2_right_shift + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2) + { + r.comp = _mm_srl_epi64(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, T s) + { + r.comp = _mm_srl_epi64(v1.comp, _mm_set1_epi64x(s)); + } + }; + } \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp index 31ea41f..440f5dd 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp @@ -349,12 +349,230 @@ namespace Phanes::Core::Math::Detail } }; + // =============== // + // TIntVector4 // + // =============== // + + template<> + struct construct_ivec4 + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, const TIntVector4& v2) + { + v1.comp = _mm_setr_epi32(v2.x, v2.y, v2.z, v2.w); + } + + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, int s) + { + v1.comp = _mm_set1_epi32(s); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, int x, int y, int z, int w) + { + v1.comp = _mm_setr_epi32(x, y, z, w); + } + + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, const int* comp) + { + v1.comp = _mm_loadu_epi32(comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2) + { + r.comp = _mm_setr_epi32(v1.x, v1.y, v2.x, v2.y); + } + }; + + template<> + struct compute_ivec4_add + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.comp = _mm_add_epi32(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.comp = _mm_add_epi32(v1.comp, _mm_set1_epi32(s)); + } + }; + + template<> + struct compute_ivec4_sub + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.comp = _mm_sub_epi32(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.comp = _mm_sub_epi32(v1.comp, _mm_set1_epi32(s)); + } + }; + + template<> + struct compute_ivec4_mul + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.comp = _mm_mul_epi32(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.comp = _mm_mul_epi32(v1.comp, _mm_set1_epi32(s)); + } + }; + + template<> + struct compute_ivec4_inc + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1) + { + r.comp = _mm_add_epi32(v1.comp, _mm_set1_epi32(1)); + } + }; + + template<> + struct compute_ivec4_dec + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1) + { + r.comp = _mm_sub_epi32(v1.comp, _mm_set1_epi32(1)); + } + }; + + template<> + struct compute_ivec4_and + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.comp = _mm_and_si128(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.comp = _mm_and_si128(v1.comp, _mm_set1_epi32(s)); + } + }; + + template<> + struct compute_ivec4_or + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.comp = _mm_or_si128(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.comp = _mm_or_si128(v1.comp, _mm_set1_epi32(s)); + } + }; + + template<> + struct compute_ivec4_xor + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.comp = _mm_xor_si128(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.comp = _mm_xor_si128(v1.comp, _mm_set1_epi32(s)); + } + }; + + template<> + struct compute_ivec4_left_shift + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.comp = _mm_sll_epi32(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.comp = _mm_sll_epi32(v1.comp, _mm_set1_epi32(s)); + } + }; + + template<> + struct compute_ivec4_right_shift + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) + { + r.comp = _mm_srl_epi32(v1.comp, v2.comp); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s) + { + r.comp = _mm_srl_epi32(v1.comp, _mm_set1_epi32(s)); + } + }; + + + // =============== // + // TIntVector3 // + // =============== // + + + template<> + struct construct_ivec3 + { + static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& v1, const TIntVector3& v2) + { + v1.comp = _mm_setr_epi32(v2.x, v2.y, v2.z, v2.w); + } + + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& v1, int s) + { + v1.comp = _mm_set1_epi32(s); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& v1, int x, int y, int z) + { + v1.comp = _mm_setr_epi32(x, y, z, 0); + } + + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& v1, const int* comp) + { + v1.comp = _mm_setr_epi32(comp[0], comp[1], comp[2], 0); + } + + static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& r, const Phanes::Core::Math::TIntVector2& v1, const int s) + { + r.comp = _mm_setr_epi32(v1.x, v1.y, s, 0); + } + }; + + + template<> struct compute_ivec3_add : public compute_ivec4_add {}; + template<> struct compute_ivec3_sub : public compute_ivec4_sub {}; + template<> struct compute_ivec3_mul : public compute_ivec4_mul {}; + template<> struct compute_ivec3_div : public compute_ivec4_div {}; + template<> struct compute_ivec3_inc : public compute_ivec4_inc {}; + template<> struct compute_ivec3_dec : public compute_ivec4_dec {}; + + + template<> struct compute_ivec3_and : public compute_ivec4_and {}; + template<> struct compute_ivec3_or : public compute_ivec4_or {}; + template<> struct compute_ivec3_xor : public compute_ivec4_xor {}; + template<> struct compute_ivec3_left_shift : public compute_ivec4_left_shift {}; + template<> struct compute_ivec3_right_shift : public compute_ivec4_right_shift {}; + + // =============== // // TIntVector2 // // =============== // template<> - struct construct_ivec2 + struct construct_ivec2 { static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& v1, const TIntVector2& v2) { @@ -380,7 +598,7 @@ namespace Phanes::Core::Math::Detail }; template<> - struct compute_ivec2_add + struct compute_ivec2_add { static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2) { @@ -394,7 +612,7 @@ namespace Phanes::Core::Math::Detail }; template<> - struct compute_ivec2_sub + struct compute_ivec2_sub { static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2) { @@ -408,7 +626,7 @@ namespace Phanes::Core::Math::Detail }; template<> - struct compute_ivec2_inc + struct compute_ivec2_inc { static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1) { @@ -417,7 +635,7 @@ namespace Phanes::Core::Math::Detail }; template<> - struct compute_ivec2_dec + struct compute_ivec2_dec { static FORCEINLINE void map(Phanes::Core::Math::TIntVector2& r, const Phanes::Core::Math::TIntVector2& v1) { @@ -494,157 +712,4 @@ namespace Phanes::Core::Math::Detail r.comp = _mm_srl_epi64(v1.comp, _mm_set1_epi64x(s)); } }; - - - // =============== // - // TIntVector4 // - // =============== // - - template<> - struct construct_ivec4 - { - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, const TIntVector4& v2) - { - v1.comp = _mm_setr_epi32(v2.x, v2.y, v2.z, v2.w); - } - - - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, int s) - { - v1.comp = _mm_set1_epi32(s); - } - - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, int x, int y, int z, int w) - { - v1.comp = _mm_setr_epi32(x, y, z, w); - } - - - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, const int* comp) - { - v1.comp = _mm_loadu_epi32(comp); - } - - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2) - { - r.comp = _mm_setr_epi32(v1.x, v1.y, v2.x, v2.y); - } - }; - - template<> - struct compute_ivec4_add - { - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) - { - r.comp = _mm_add_epi32(v1.comp, v2.comp); - } - - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, T s) - { - r.comp = _mm_add_epi32(v1.comp, _mm_set1_epi32(s)); - } - }; - - template<> - struct compute_ivec4_sub - { - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) - { - r.comp = _mm_sub_epi32(v1.comp, v2.comp); - } - - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, T s) - { - r.comp = _mm_sub_epi32(v1.comp, _mm_set1_epi32(s)); - } - }; - - template<> - struct compute_ivec4_inc - { - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1) - { - r.comp = _mm_add_epi32(v1.comp, _mm_set1_epi32(1)); - } - }; - - template<> - struct compute_ivec4_dec - { - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1) - { - r.comp = _mm_sub_epi32(v1.comp, _mm_set1_epi32(1)); - } - }; - - template<> - struct compute_ivec4_and - { - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) - { - r.comp = _mm_and_si128(v1.comp, v2.comp); - } - - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, T s) - { - r.comp = _mm_and_si128(v1.comp, _mm_set1_epi32(s)); - } - }; - - template<> - struct compute_ivec4_or - { - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) - { - r.comp = _mm_or_si128(v1.comp, v2.comp); - } - - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, T s) - { - r.comp = _mm_or_si128(v1.comp, _mm_set1_epi32(s)); - } - }; - - template<> - struct compute_ivec4_xor - { - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) - { - r.comp = _mm_xor_si128(v1.comp, v2.comp); - } - - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, T s) - { - r.comp = _mm_xor_si128(v1.comp, _mm_set1_epi32(s)); - } - }; - - template<> - struct compute_ivec4_left_shift - { - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) - { - r.comp = _mm_sll_epi32(v1.comp, v2.comp); - } - - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, T s) - { - r.comp = _mm_sll_epi32(v1.comp, _mm_set1_epi32(s)); - } - }; - - template<> - struct compute_ivec4_right_shift - { - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2) - { - r.comp = _mm_srl_epi32(v1.comp, v2.comp); - } - - static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, T s) - { - r.comp = _mm_srl_epi32(v1.comp, _mm_set1_epi32(s)); - } - }; - } \ No newline at end of file