diff --git a/Engine/Source/Runtime/Core/Math/SIMD/PhanesVectorMathFPU.hpp b/Engine/Source/Runtime/Core/Math/SIMD/PhanesVectorMathFPU.hpp
index 3dde2f6..09f6f30 100644
--- a/Engine/Source/Runtime/Core/Math/SIMD/PhanesVectorMathFPU.hpp
+++ b/Engine/Source/Runtime/Core/Math/SIMD/PhanesVectorMathFPU.hpp
@@ -1,74 +1,80 @@
#pragma once
-#include "Core/public/Math/SIMD/PhanesSIMDTypes.h"
-#include "Core/public/Math/MathCommon.hpp"
-
+#include "Core/Math/MathCommon.hpp"
+#include "Core/Math/SIMD/PhanesSIMDTypes.h"
namespace Phanes::Core::Math::SIMD
{
- ///
- /// Adds all scalars of the vector.
- ///
- /// Vector
- /// Sum stored in v[0:31].
- Phanes::Core::Types::Vec4f32Reg vec4_hadd(const Phanes::Core::Types::Vec4f32Reg v)
- {
- Phanes::Core::Types::Vec4f32Reg r;
- r.data[0] = v.data[0] + v.data[1] + v.data[2] + v.data[3];
- }
-
- ///
- /// Adds all scalars of the vector.
- ///
- /// Vector
- /// Sum of components.
- float vec4_hadd_cvtf32(const Phanes::Core::Types::Vec4f32Reg v)
- {
- return v.data[0] + v.data[1] + v.data[2] + v.data[3];
- }
-
- ///
- /// Gets the absolute value of each scalar in the vector.
- ///
- /// Vector
- /// Vector with all components positive.
- Phanes::Core::Types::Vec4f32Reg vec4_abs(const Phanes::Core::Types::Vec4f32Reg v)
- {
- Phanes::Core::Types::Vec4f32Reg r;
+ ///
+ /// Adds all scalars of the vector.
+ ///
+ /// Vector
+ /// Sum stored in v[0:31].
+ Phanes::Core::Types::Vec4f32Reg vec4_hadd(const Phanes::Core::Types::Vec4f32Reg v)
+ {
+ Phanes::Core::Types::Vec4f32Reg r;
+ r.data[0] = v.data[0] + v.data[1] + v.data[2] + v.data[3];
+ }
- r.data[0] = Abs(v.data[0]);
- }
-
- ///
- /// Gets the dot product of the
- ///
- ///
- ///
- ///
- Phanes::Core::Types::Vec4f32Reg vec4_dot(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2)
- {
- Phanes::Core::Types::Vec4f32Reg r;
- r.data[0] = v1.data[0] * v1.data[0] + v1.data[1] * v2.data[1] + v1.data[2] * v2.data[2] + v1.data[3] * v2.data[3];
+ ///
+ /// Adds all scalars of the vector.
+ ///
+ /// Vector
+ /// Sum of components.
+ float vec4_hadd_cvtf32(const Phanes::Core::Types::Vec4f32Reg v)
+ {
+ return v.data[0] + v.data[1] + v.data[2] + v.data[3];
+ }
- return r;
- }
-
- ///
- /// Gets the dot product of the
- ///
- ///
- ///
- ///
- float vec4_dot_cvtf32(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2)
- {
- return v1.data[0] * v1.data[0] + v1.data[1] * v2.data[1] + v1.data[2] * v2.data[2] + v1.data[3] * v2.data[3];
- }
+ ///
+ /// Gets the absolute value of each scalar in the vector.
+ ///
+ /// Vector
+ /// Vector with all components positive.
+ Phanes::Core::Types::Vec4f32Reg vec4_abs(const Phanes::Core::Types::Vec4f32Reg v)
+ {
+ Phanes::Core::Types::Vec4f32Reg r;
- Phanes::Core::Types::Vec2f64Reg vec2_eq(const Phanes::Core::Types::Vec2f64Reg v1, const Phanes::Core::Types::Vec2f64Reg v2)
- {
- Phanes::Core::Types::Vec4f64Reg r;
+ r.data[0] = Abs(v.data[0]);
+ }
- r.data[0] = (Phanes::Core::Math::Abs(v1.data[0] - v2.data[0]) < P_FLT_INAC) ? 0xFFFFFFFF : 0;
- r.data[1] = (Phanes::Core::Math::Abs(v1.data[1] - v2.data[1]) < P_FLT_INAC) ? 0xFFFFFFFF : 0;
- }
-}
\ No newline at end of file
+ ///
+ /// Gets the dot product of the
+ ///
+ ///
+ ///
+ ///
+ Phanes::Core::Types::Vec4f32Reg vec4_dot(const Phanes::Core::Types::Vec4f32Reg v1,
+ const Phanes::Core::Types::Vec4f32Reg v2)
+ {
+ Phanes::Core::Types::Vec4f32Reg r;
+ r.data[0] = v1.data[0] * v1.data[0] + v1.data[1] * v2.data[1] + v1.data[2] * v2.data[2] +
+ v1.data[3] * v2.data[3];
+
+ return r;
+ }
+
+ ///
+ /// Gets the dot product of the
+ ///
+ ///
+ ///
+ ///
+ float vec4_dot_cvtf32(const Phanes::Core::Types::Vec4f32Reg v1,
+ const Phanes::Core::Types::Vec4f32Reg v2)
+ {
+ return v1.data[0] * v1.data[0] + v1.data[1] * v2.data[1] + v1.data[2] * v2.data[2] +
+ v1.data[3] * v2.data[3];
+ }
+
+ Phanes::Core::Types::Vec2f64Reg vec2_eq(const Phanes::Core::Types::Vec2f64Reg v1,
+ const Phanes::Core::Types::Vec2f64Reg v2)
+ {
+ Phanes::Core::Types::Vec4f64Reg r;
+
+ r.data[0] =
+ (Phanes::Core::Math::Abs(v1.data[0] - v2.data[0]) < P_FLT_INAC) ? 0xFFFFFFFF : 0;
+ r.data[1] =
+ (Phanes::Core::Math::Abs(v1.data[1] - v2.data[1]) < P_FLT_INAC) ? 0xFFFFFFFF : 0;
+ }
+} // namespace Phanes::Core::Math::SIMD
diff --git a/Engine/Source/Runtime/Core/Math/SIMD/PhanesVectorMathSSE.hpp b/Engine/Source/Runtime/Core/Math/SIMD/PhanesVectorMathSSE.hpp
index 8d1bd00..125a96b 100644
--- a/Engine/Source/Runtime/Core/Math/SIMD/PhanesVectorMathSSE.hpp
+++ b/Engine/Source/Runtime/Core/Math/SIMD/PhanesVectorMathSSE.hpp
@@ -1,11 +1,9 @@
#pragma once
-#include
+#include
-#include "Core/Math/SIMD/PhanesSIMDTypes.h"
#include "Core/Math/Boilerplate.h"
-#include "Core/Math/MathCommon.hpp"
-
+#include "Core/Math/SIMD/PhanesSIMDTypes.h"
#include "Core/Math/Vector2.hpp"
#include "Core/Math/Vector3.hpp"
@@ -20,1490 +18,1686 @@
#include "Core/Math/Matrix3.hpp"
#include "Core/Math/Matrix4.hpp"
-
-
// ========== //
// Common //
// ========== //
#ifndef PHANES_VECTOR_MATH_SSE_HPP
-#define PHANES_VECTOR_MATH_SSE_HPP
+# define PHANES_VECTOR_MATH_SSE_HPP
namespace Phanes::Core::Math::SIMD
{
- Phanes::Core::Types::Vec4f32Reg vec4_cross_p(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2)
- {
- __m128 tmp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1));
- __m128 tmp1 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2));
- __m128 tmp2 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
- __m128 tmp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1));
- return _mm_sub_ps(
- _mm_mul_ps(tmp0, tmp1),
- _mm_mul_ps(tmp2, tmp3)
- );
- }
+ Phanes::Core::Types::Vec4f32Reg vec4_cross_p(const Phanes::Core::Types::Vec4f32Reg v1,
+ const Phanes::Core::Types::Vec4f32Reg v2)
+ {
+ __m128 tmp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1));
+ __m128 tmp1 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2));
+ __m128 tmp2 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
+ __m128 tmp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1));
+ return _mm_sub_ps(_mm_mul_ps(tmp0, tmp1), _mm_mul_ps(tmp2, tmp3));
+ }
+ ///
+ /// Adds all scalars of the vector.
+ ///
+ /// Vector
+ /// Sum stored in v[0:31].
+ Phanes::Core::Types::Vec4f32Reg vec4_hadd(const Phanes::Core::Types::Vec4f32Reg v)
+ {
+ __m128 t = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 3, 0, 1));
+ t = _mm_add_ps(t, v);
+ t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(1, 0, 3, 2));
+ return _mm_add_ps(t, v);
+ }
- ///
- /// Adds all scalars of the vector.
- ///
- /// Vector
- /// Sum stored in v[0:31].
- Phanes::Core::Types::Vec4f32Reg vec4_hadd(const Phanes::Core::Types::Vec4f32Reg v)
- {
- __m128 t = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 3, 0, 1));
- t = _mm_add_ps(t, v);
- t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(1, 0, 3, 2));
- return _mm_add_ps(t, v);
- }
+ ///
+ /// Adds all scalars of the vector.
+ ///
+ /// Vector
+ /// Sum of components.
+ float vec4_hadd_cvtf32(const Phanes::Core::Types::Vec4f32Reg v)
+ {
+ __m128 t = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 3, 0, 1));
+ t = _mm_add_ps(t, v);
+ t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(1, 0, 3, 2));
+ t = _mm_add_ps(t, v);
+ return _mm_cvtss_f32(t);
+ }
- ///
- /// Adds all scalars of the vector.
- ///
- /// Vector
- /// Sum of components.
- float vec4_hadd_cvtf32(const Phanes::Core::Types::Vec4f32Reg v)
- {
- __m128 t = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 3, 0, 1));
- t = _mm_add_ps(t, v);
- t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(1, 0, 3, 2));
- t = _mm_add_ps(t, v);
- return _mm_cvtss_f32(t);
- }
+ ///
+ /// Gets the absolute value of each scalar in the vector.
+ ///
+ /// Vector
+ /// Vector with all components positive.
+ Phanes::Core::Types::Vec4f32Reg vec4_abs(const Phanes::Core::Types::Vec4f32Reg v)
+ {
+ return _mm_and_ps(v, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)));
+ }
- ///
- /// Gets the absolute value of each scalar in the vector.
- ///
- /// Vector
- /// Vector with all components positive.
- Phanes::Core::Types::Vec4f32Reg vec4_abs(const Phanes::Core::Types::Vec4f32Reg v)
- {
- return _mm_and_ps(v, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)));
- }
-
- ///
- /// Gets the dot product of the
- ///
- ///
- ///
- ///
- Phanes::Core::Types::Vec4f32Reg vec4_dot(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2)
- {
- return vec4_hadd(_mm_mul_ps(v1, v2));
- }
-
- ///
- /// Gets the dot product of the
- ///
- ///
- ///
- ///
- float vec4_dot_cvtf32(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2)
- {
- return vec4_hadd_cvtf32(_mm_mul_ps(v1, v2));
- }
-
- Phanes::Core::Types::Vec2f64Reg vec2_eq(const Phanes::Core::Types::Vec2f64Reg v1, const Phanes::Core::Types::Vec2f64Reg v2)
- {
- return _mm_cmpeq_pd(v1, v2);
- }
-
- ///
- /// Sets the last component of the register to zero.
- /// The last component could hold unexpected values.
- ///
- ///
- void vec3_fix(Phanes::Core::Types::Vec4f32Reg v1)
- {
- v1 = _mm_and_ps(v1, _mm_castsi128_ps(_mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000)));
- }
-}
+ ///
+ /// Gets the dot product of the
+ ///
+ ///
+ ///
+ ///
+ Phanes::Core::Types::Vec4f32Reg vec4_dot(const Phanes::Core::Types::Vec4f32Reg v1,
+ const Phanes::Core::Types::Vec4f32Reg v2)
+ {
+ return vec4_hadd(_mm_mul_ps(v1, v2));
+ }
+ ///
+ /// Gets the dot product of the
+ ///
+ ///
+ ///
+ ///
+ float vec4_dot_cvtf32(const Phanes::Core::Types::Vec4f32Reg v1,
+ const Phanes::Core::Types::Vec4f32Reg v2)
+ {
+ return vec4_hadd_cvtf32(_mm_mul_ps(v1, v2));
+ }
+ Phanes::Core::Types::Vec2f64Reg vec2_eq(const Phanes::Core::Types::Vec2f64Reg v1,
+ const Phanes::Core::Types::Vec2f64Reg v2)
+ {
+ return _mm_cmpeq_pd(v1, v2);
+ }
+ ///
+ /// Sets the last component of the register to zero.
+ /// The last component could hold unexpected values.
+ ///
+ ///
+ void vec3_fix(Phanes::Core::Types::Vec4f32Reg v1)
+ {
+ v1 = _mm_and_ps(
+ v1, _mm_castsi128_ps(_mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000)));
+ }
+} // namespace Phanes::Core::Math::SIMD
// ============ //
// TVector4 //
// ============ //
-
namespace Phanes::Core::Math::Detail
{
- // Template class has already been defined and is included through: Storage.h -> Vector4.hpp -> SIMDIntrinsics.h -> PhanesVectorMathSEE.hpp
-
-
- template<>
- struct construct_vec4
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, const TVector4& v2)
- {
- v1.comp = v2.comp;
- }
-
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, float s)
- {
- v1.comp = _mm_set_ps1(s);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, float x, float y, float z, float w)
- {
- v1.comp = _mm_setr_ps(x, y, z, w);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector2& v2, const Phanes::Core::Math::TVector2& v3)
- {
- v1.comp = _mm_set_ps(v2.x, v2.y, v3.x, v3.y);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, const float* s)
- {
- v1.comp = _mm_loadu_ps(s);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector3& v, float w)
- {
- r.comp = _mm_set_ps(w, v.z, v.y, v.x);
- }
- };
-
- template<>
- struct move_vec4
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, Phanes::Core::Math::TVector4&& v)
- {
- r.data = v.data;
- v.data = _mm_setzero_ps();
- }
- };
-
- template<>
- struct compute_vec4_add
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2)
- {
- r.comp = _mm_add_ps(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, float s)
- {
- r.comp = _mm_add_ps(v1.comp, _mm_set_ps1(s));
- }
- };
-
- template<>
- struct compute_vec4_sub
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2)
- {
- r.comp = _mm_sub_ps(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, float s)
- {
- r.comp = _mm_sub_ps(v1.comp, _mm_set_ps1(s));
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, float s, const Phanes::Core::Math::TVector4& v1)
- {
- r.comp = _mm_sub_ps(_mm_set_ps1(s), v1.comp);
- }
- };
-
- template<>
- struct compute_vec4_mul
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2)
- {
- r.comp = _mm_mul_ps(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, float s)
- {
- r.comp = _mm_mul_ps(v1.comp, _mm_set_ps1(s));
- }
- };
-
- template<>
- struct compute_vec4_div
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2)
- {
- r.comp = _mm_div_ps(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, float s)
- {
- r.comp = _mm_div_ps(v1.comp, _mm_set_ps1(s));
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, float s, const Phanes::Core::Math::TVector4& v1)
- {
- r.comp = _mm_div_ps(_mm_set_ps1(s), v1.comp);
- }
- };
-
- template<>
- struct compute_vec4_inc
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1)
- {
- r.comp = _mm_add_ps(v1.comp, _mm_set_ps1(1.0f));
- }
- };
-
- template<>
- struct compute_vec4_dec
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1)
- {
- r.comp = _mm_sub_ps(v1.comp, _mm_set_ps1(1.0f));
- }
- };
-
- template<>
- struct compute_vec4_mag
- {
- static FORCEINLINE float map(const Phanes::Core::Math::TVector4& v1)
- {
- __m128 tmp = _mm_mul_ps(v1.data, v1.data);
- return _mm_cvtss_f32(_mm_sqrt_ps(SIMD::vec4_hadd(tmp)));
- }
- };
-
- template<>
- struct compute_vec4_dotp
- {
- static FORCEINLINE float map(const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2)
- {
- return SIMD::vec4_dot_cvtf32(v1.data, v2.data);
- }
- };
-
- template<>
- struct compute_vec4_set
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, float x, float y, float z, float w)
- {
- v1.data = _mm_setr_ps(x, y, z, w);
- }
- };
-
- template<>
- struct compute_vec4_max
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2)
- {
- r.data = _mm_max_ps(v1.data, v2.data);
- }
- };
-
- template<>
- struct compute_vec4_min
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2)
- {
- r.data = _mm_min_ps(v1.data, v2.data);
- }
- };
-
- template<>
- struct compute_vec4_pdiv
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1)
- {
- r.data = _mm_div_ps(v1.data, _mm_set_ps1(v1.w));
- r.w = 0.0f;
- }
- };
-
-
- // ============ //
- // TVector3 //
- // ============ //
-
-
- template<>
- struct construct_vec3
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, const TVector3& v2)
- {
- v1.comp = _mm_setr_ps(v2.x, v2.y, v2.z, 0.0f);
- }
-
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, float s)
- {
- v1.comp = _mm_set_ps1(s);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, float x, float y, float z)
- {
- v1.comp = _mm_setr_ps(x, y, z, 0.0f);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, const Phanes::Core::Math::TVector2& v2, float s)
- {
- v1.comp = _mm_set_ps(v2.x, v2.y, s, 0.0f);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, const float* s)
- {
- v1.comp = _mm_setr_ps(s[0], s[1], s[2], 0.0f);
-
- }
- };
-
- template<>
- struct move_vec3
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector3& r, Phanes::Core::Math::TVector3&& v)
- {
- r.data = v.data;
- v.data = _mm_setzero_ps();
- }
- };
-
- template<>
- struct compute_vec3_set
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, float x, float y, float z)
- {
- v1.data = _mm_setr_ps(x, y, z, 0.0f);
- }
- };
-
- template<> struct compute_vec3_add : public compute_vec4_add {};
- template<> struct compute_vec3_sub : public compute_vec4_sub {};
- template<> struct compute_vec3_mul : public compute_vec4_mul {};
- template<> struct compute_vec3_div : public compute_vec4_div {};
- template<> struct compute_vec3_inc : public compute_vec4_inc {};
- template<> struct compute_vec3_dec : public compute_vec4_dec {};
- template<> struct compute_vec3_mag : public compute_vec4_mag {};
- template<> struct compute_vec3_dotp : public compute_vec4_dotp {};
- template<> struct compute_vec3_max : public compute_vec4_max {};
- template<> struct compute_vec3_min : public compute_vec4_min {};
-
- template<>
- struct compute_vec3_cross_p
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector3& r, const Phanes::Core::Math::TVector3& v1, const Phanes::Core::Math::TVector3& v2)
- {
- r.data = Phanes::Core::Math::SIMD::vec4_cross_p(v1.data, v2.data);
- }
- };
-
- // ============ //
- // TVector2 //
- // ============ //
-
-
- template<>
- struct construct_vec2
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, const TVector2& v2)
- {
- v1.comp = _mm_setr_pd(v2.x, v2.y);
- }
-
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, double s)
- {
- v1.comp = _mm_set_pd1(s);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, double x, double y)
- {
- v1.comp = _mm_setr_pd(x, y);
- }
-
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, const double* s)
- {
- v1.comp = _mm_loadu_pd(s);
-
- }
- };
-
-
- template<>
- struct compute_vec2_add
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2)
- {
- r.comp = _mm_add_pd(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, double s)
- {
- r.comp = _mm_add_pd(v1.comp, _mm_set1_pd(s));
- }
- };
-
- template<>
- struct compute_vec2_sub
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2)
- {
- r.comp = _mm_sub_pd(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, double s)
- {
- r.comp = _mm_sub_pd(v1.comp, _mm_set1_pd(s));
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, double s, const Phanes::Core::Math::TVector2& v1)
- {
- r.comp = _mm_sub_pd(_mm_set1_pd(s), v1.comp);
- }
- };
-
- template<>
- struct compute_vec2_mul
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2)
- {
- r.comp = _mm_mul_pd(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, double s)
- {
- r.comp = _mm_mul_pd(v1.comp, _mm_set1_pd(s));
- }
- };
-
- template<>
- struct compute_vec2_div
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2)
- {
- r.comp = _mm_div_pd(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, double s)
- {
- r.comp = _mm_div_pd(v1.comp, _mm_set1_pd(s));
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, double s, const Phanes::Core::Math::TVector2& v1)
- {
- r.comp = _mm_div_pd(_mm_set1_pd(s), v1.comp);
- }
- };
-
- template<>
- struct compute_vec2_inc
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1)
- {
- r.comp = _mm_add_pd(v1.comp, _mm_set1_pd(1.0f));
- }
- };
-
- template<>
- struct compute_vec2_dec
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1)
- {
- r.comp = _mm_sub_pd(v1.comp, _mm_set1_pd(1.0f));
- }
- };
-
- // Magnitude
- template
- struct compute_vec2_mag
- {
- static FORCEINLINE double map(const Phanes::Core::Math::TVector2& v1)
- {
- __m128d tmp = _mm_mul_pd(v1.data, v1.data);
- return _mm_cvtsd_f64(_mm_sqrt_pd(_mm_hadd_pd(tmp, tmp)));
- }
- };
-
- // Dot product
- template<>
- struct compute_vec2_dotp
- {
- static FORCEINLINE double map(const Phanes::Core::Math::TVector2& v1)
- {
- __m128d tmp = _mm_mul_pd(v1.data, v1.data);
- return _mm_cvtsd_f64(_mm_hadd_pd(tmp, tmp));
- }
- };
-
- // Max
- template<>
- struct compute_vec2_max
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2)
- {
- r.data = _mm_max_pd(v1.data, v2.data);
- }
- };
-
- // Min
- template<>
- struct compute_vec2_min
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2)
- {
- r.data = _mm_min_pd(v1.data, v2.data);
- }
- };
-
- // Set
- template<>
- struct compute_vec2_set
- {
- static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, double x, double y)
- {
- v1.data = _mm_setr_pd(x, y);
- }
- };
-
-
- // ========= //
- // Plane //
- // ========= //
-
- template<>
- struct construct_plane
- {
- static FORCEINLINE void map(Phanes::Core::Math::TPlane& pl, const TVector3& v1, float d)
- {
- pl.comp.data = v1.data;
- pl.comp.w = d;
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TPlane& pl, const TVector3& normal, const TVector3& base)
- {
- pl.comp.data = normal.data;
- pl.comp.w = DotP(normal, base);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TPlane& pl, float x, float y, float z, float d)
- {
-
- pl.comp.data = _mm_set_ps(x, y, z, d);
- }
-
- // TODO: Create SSE constructor with 3 Points
-
- };
-
- template<>
- struct compute_plane_add
- {
- static FORCEINLINE void map(Phanes::Core::Math::TPlane& r, Phanes::Core::Math::TPlane& pl1, Phanes::Core::Math::TPlane& pl2)
- {
- r.comp.data = _mm_add_ps(pl1.comp.data, pl2.comp.data);
- }
- };
-
- template<>
- struct compute_plane_sub
- {
- static FORCEINLINE void map(Phanes::Core::Math::TPlane& r, Phanes::Core::Math::TPlane& pl1, Phanes::Core::Math::TPlane& pl2)
- {
- r.comp.data = _mm_sub_ps(pl1.comp.data, pl2.comp.data);
- }
- };
-
- template<>
- struct compute_plane_mul
- {
- static FORCEINLINE void map(Phanes::Core::Math::TPlane& r, Phanes::Core::Math::TPlane& pl1, Phanes::Core::Math::TPlane& pl2)
- {
- r.comp.data = _mm_mul_ps(pl1.comp.data, pl2.comp.data);
- }
- };
-
- template<>
- struct compute_plane_div
- {
- static FORCEINLINE void map(Phanes::Core::Math::TPlane& r, Phanes::Core::Math::TPlane& pl1, Phanes::Core::Math::TPlane& pl2)
- {
- r.comp.data = _mm_div_ps(pl1.comp.data, pl2.comp.data);
- }
- };
-
- // =============== //
- // TIntVector4 //
- // =============== //
-
- template<>
- struct construct_ivec4
- {
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, const TIntVector4& v2)
- {
- v1.comp = _mm_setr_epi32(v2.x, v2.y, v2.z, v2.w);
- }
-
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, int s)
- {
- v1.comp = _mm_set1_epi32(s);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, int x, int y, int z, int w)
- {
- v1.comp = _mm_setr_epi32(x, y, z, w);
- }
-
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& v1, const int* comp)
- {
- v1.comp = _mm_set_epi32(comp[3], comp[2], comp[1], comp[0]);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector2& v1, const Phanes::Core::Math::TIntVector2& v2)
- {
- r.comp = _mm_setr_epi32(v1.x, v1.y, v2.x, v2.y);
- }
- };
-
- template<>
- struct compute_ivec4_add
- {
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2)
- {
- r.comp = _mm_add_epi32(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s)
- {
- r.comp = _mm_add_epi32(v1.comp, _mm_set1_epi32(s));
- }
- };
-
- template<>
- struct compute_ivec4_sub
- {
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2)
- {
- r.comp = _mm_sub_epi32(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s)
- {
- r.comp = _mm_sub_epi32(v1.comp, _mm_set1_epi32(s));
- }
- };
-
- template<>
- struct compute_ivec4_mul
- {
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2)
- {
- r.comp = _mm_mul_epi32(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s)
- {
- r.comp = _mm_mul_epi32(v1.comp, _mm_set1_epi32(s));
- }
- };
-
- template<>
- struct compute_ivec4_inc
- {
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1)
- {
- r.comp = _mm_add_epi32(v1.comp, _mm_set1_epi32(1));
- }
- };
-
- template<>
- struct compute_ivec4_dec
- {
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1)
- {
- r.comp = _mm_sub_epi32(v1.comp, _mm_set1_epi32(1));
- }
- };
-
- template<>
- struct compute_ivec4_and
- {
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2)
- {
- r.comp = _mm_and_si128(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s)
- {
- r.comp = _mm_and_si128(v1.comp, _mm_set1_epi32(s));
- }
- };
-
- template<>
- struct compute_ivec4_or
- {
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2)
- {
- r.comp = _mm_or_si128(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s)
- {
- r.comp = _mm_or_si128(v1.comp, _mm_set1_epi32(s));
- }
- };
-
- template<>
- struct compute_ivec4_xor
- {
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2)
- {
- r.comp = _mm_xor_si128(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s)
- {
- r.comp = _mm_xor_si128(v1.comp, _mm_set1_epi32(s));
- }
- };
-
- template<>
- struct compute_ivec4_left_shift
- {
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2)
- {
- r.comp = _mm_sll_epi32(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s)
- {
- r.comp = _mm_sll_epi32(v1.comp, _mm_set1_epi32(s));
- }
- };
-
- template<>
- struct compute_ivec4_right_shift
- {
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, const Phanes::Core::Math::TIntVector4& v2)
- {
- r.comp = _mm_srl_epi32(v1.comp, v2.comp);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector4& r, const Phanes::Core::Math::TIntVector4& v1, int s)
- {
- r.comp = _mm_srl_epi32(v1.comp, _mm_set1_epi32(s));
- }
- };
-
-
- // =============== //
- // TIntVector3 //
- // =============== //
-
-
- template<>
- struct construct_ivec3
- {
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& v1, const TIntVector3& v2)
- {
- v1.comp = _mm_setr_epi32(v2.x, v2.y, v2.z, v2.w);
- }
-
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& v1, int s)
- {
- v1.comp = _mm_set1_epi32(s);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& v1, int x, int y, int z)
- {
- v1.comp = _mm_setr_epi32(x, y, z, 0);
- }
-
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& v1, const int* comp)
- {
- v1.comp = _mm_setr_epi32(comp[0], comp[1], comp[2], 0);
- }
-
- static FORCEINLINE void map(Phanes::Core::Math::TIntVector3& r, const Phanes::Core::Math::TIntVector2& v1, const int s)
- {
- r.comp = _mm_setr_epi32(v1.x, v1.y, s, 0);
- }
- };
-
-
- template<> struct compute_ivec3_add