diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp
index cda1c1b..77f78e0 100644
--- a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp
+++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp
@@ -102,6 +102,16 @@ namespace Phanes::Core::Math::SIMD
{
return _mm_cmpeq_pd(v1, v2);
}
+
+ ///
+ /// Sets the last component of the register to zero.
+ /// The last component could hold unexpected values.
+ ///
+ ///
+ void vec3_fix(Phanes::Core::Types::Vec4f32Reg v1)
+ {
+ v1 = _mm_blend_ps(v1, _mm_setzero_ps(), 0x1);
+ }
}
@@ -175,6 +185,11 @@ namespace Phanes::Core::Math::Detail
{
r.comp = _mm_sub_ps(v1.comp, _mm_set_ps1(s));
}
+
+ static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, float s, const Phanes::Core::Math::TVector4& v1)
+ {
+ r.comp = _mm_sub_ps(_mm_set_ps1(s), v1.comp);
+ }
};
template<>
@@ -203,6 +218,11 @@ namespace Phanes::Core::Math::Detail
{
r.comp = _mm_div_ps(v1.comp, _mm_set_ps1(s));
}
+
+ static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, float s, const Phanes::Core::Math::TVector4& v1)
+ {
+ r.comp = _mm_div_ps(_mm_set_ps1(s), v1.comp);
+ }
};
template<>
@@ -223,6 +243,62 @@ namespace Phanes::Core::Math::Detail
}
};
+ template<>
+ struct compute_vec4_mag
+ {
+ static FORCEINLINE float map(const Phanes::Core::Math::TVector4& v1)
+ {
+ __m128 tmp = _mm_mul_ps(v1.data, v1.data);
+ return sqrt(tmp.m128_f32[0] + tmp.m128_f32[1] + tmp.m128_f32[2] + tmp.m128_f32[3]);
+ }
+ };
+
+ template<>
+ struct compute_vec4_dotp
+ {
+ static FORCEINLINE float map(const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2)
+ {
+ return SIMD::vec4_dot_cvtf32(v1.data, v2.data);
+ }
+ };
+
+ template<>
+ struct compute_vec4_set
+ {
+ static FORCEINLINE void map(Phanes::Core::Math::TVector4& v1, float x, float y, float z, float w)
+ {
+ v1.data = _mm_setr_ps(x, y, z, w);
+ }
+ };
+
+ template<>
+ struct compute_vec4_max
+ {
+ static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2)
+ {
+ r.data = _mm_max_ps(v1.data, v2.data);
+ }
+ };
+
+ template<>
+ struct compute_vec4_min
+ {
+ static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1, const Phanes::Core::Math::TVector4& v2)
+ {
+ r.data = _mm_min_ps(v1.data, v2.data);
+ }
+ };
+
+ template<>
+ struct compute_vec4_pdiv
+ {
+ static FORCEINLINE void map(Phanes::Core::Math::TVector4& r, const Phanes::Core::Math::TVector4& v1)
+ {
+ __m128 tmp = _mm_div_ps(v1.data, _mm_set_ps1(v1.w));
+ r.data = _mm_blend_ps(tmp, _mm_setzero_ps(), 0x1);
+ }
+ };
+
// ============ //
// TVector3 //
@@ -260,6 +336,14 @@ namespace Phanes::Core::Math::Detail
}
};
+ template<>
+ struct compute_vec3_set
+ {
+ static FORCEINLINE void map(Phanes::Core::Math::TVector3& v1, float x, float y, float z)
+ {
+ v1.data = _mm_setr_ps(x, y, z, 0.0f);
+ }
+ };
template<> struct compute_vec3_add : public compute_vec4_add {};
template<> struct compute_vec3_sub : public compute_vec4_sub {};
@@ -267,6 +351,10 @@ namespace Phanes::Core::Math::Detail
template<> struct compute_vec3_div : public compute_vec4_div {};
template<> struct compute_vec3_inc : public compute_vec4_inc {};
template<> struct compute_vec3_dec : public compute_vec4_dec {};
+ template<> struct compute_vec3_mag : public compute_vec4_mag {};
+ template<> struct compute_vec3_dotp : public compute_vec4_dotp {};
+ template<> struct compute_vec3_max : public compute_vec4_max {};
+ template<> struct compute_vec3_min : public compute_vec4_min {};
template<>
struct compute_vec3_cross_p
@@ -336,6 +424,11 @@ namespace Phanes::Core::Math::Detail
{
r.comp = _mm_sub_pd(v1.comp, _mm_set1_pd(s));
}
+
+ static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, double s, const Phanes::Core::Math::TVector2& v1)
+ {
+ r.comp = _mm_sub_pd(_mm_set1_pd(s), v1.comp);
+ }
};
template<>
@@ -364,6 +457,11 @@ namespace Phanes::Core::Math::Detail
{
r.comp = _mm_div_pd(v1.comp, _mm_set1_pd(s));
}
+
+ static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, double s, const Phanes::Core::Math::TVector2& v1)
+ {
+ r.comp = _mm_div_pd(_mm_set1_pd(s), v1.comp);
+ }
};
template<>
@@ -384,6 +482,58 @@ namespace Phanes::Core::Math::Detail
}
};
+ // Magnitude
+ template
+ struct compute_vec2_mag
+ {
+ static FORCEINLINE double map(const Phanes::Core::Math::TVector2& v1)
+ {
+ __m128d tmp = _mm_mul_pd(v1.data, v1.data);
+ return sqrt(tmp.m128d_f64[0] + tmp.m128d_f64[1]);
+ }
+ };
+
+ // Dot product
+ template<>
+ struct compute_vec2_dotp
+ {
+ static FORCEINLINE double map(const Phanes::Core::Math::TVector2& v1)
+ {
+ __m128d tmp = _mm_mul_pd(v1.data, v1.data);
+ return tmp.m128d_f64[0] + tmp.m128d_f64[1];
+ }
+ };
+
+ // Max
+ template<>
+ struct compute_vec2_max
+ {
+ static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2)
+ {
+ r.data = _mm_max_pd(v1.data, v2.data);
+ }
+ };
+
+ // Min
+ template<>
+ struct compute_vec2_min
+ {
+ static FORCEINLINE void map(Phanes::Core::Math::TVector2& r, const Phanes::Core::Math::TVector2& v1, const Phanes::Core::Math::TVector2& v2)
+ {
+ r.data = _mm_min_pd(v1.data, v2.data);
+ }
+ };
+
+ // Set
+ template<>
+ struct compute_vec2_set
+ {
+ static FORCEINLINE void map(Phanes::Core::Math::TVector2& v1, double x, double y)
+ {
+ v1.data = _mm_setr_pd(x, y);
+ }
+ };
+
// =============== //
// TIntVector4 //