Add Matrix4 and Matrix4 SIMD.
This commit is contained in:
parent
ed44c3695c
commit
5861d75bdb
@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include "Core/public/Math/Boilerplate.h"
|
||||
#include "Core/public/Math/MathCommon.hpp"
|
||||
|
||||
namespace Phanes::Core::Math::Detail
|
||||
{
|
||||
template<RealType T, bool S>
|
||||
struct compute_mat3_transpose {};
|
||||
|
||||
template<RealType T>
|
||||
struct compute_mat3_transpose<T, false>
|
||||
{
|
||||
static constexpr void map(Phanes::Core::Math::TMatrix3<T, false>& r, const TMatrix3<T, false>& m1)
|
||||
{
|
||||
r = TMatrix4<T, false>(m1(0, 0), m1(1, 0), m1(2, 0),
|
||||
m1(0, 1), m1(1, 1), m1(2, 1),
|
||||
m1(0, 2), m1(1, 2), m1(2, 2)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
}
|
@ -0,0 +1,98 @@
|
||||
#pragma once
|
||||
|
||||
#include "Core/public/Math/Boilerplate.h"
|
||||
#include "Core/public/Math/MathCommon.hpp"
|
||||
|
||||
namespace Phanes::Core::Math::Detail
|
||||
{
|
||||
template<RealType T, bool S>
|
||||
struct compute_mat4_det {};
|
||||
|
||||
template<RealType T, bool S>
|
||||
struct compute_mat4_inv {};
|
||||
|
||||
template<RealType T, bool S>
|
||||
struct compute_mat4_transpose {};
|
||||
|
||||
|
||||
template<RealType T>
|
||||
struct compute_mat4_det<T, false>
|
||||
{
|
||||
static constexpr T map(Phanes::Core::Math::TMatrix4<T, S>& m)
|
||||
{
|
||||
const TVector3<T, false>& a = reinterpret_cast<TVector3<T, false>&>(m[0]);
|
||||
const TVector3<T, false>& b = reinterpret_cast<TVector3<T, false>&>(m[1]);
|
||||
const TVector3<T, false>& c = reinterpret_cast<TVector3<T, false>&>(m[2]);
|
||||
const TVector3<T, false>& d = reinterpret_cast<TVector3<T, false>&>(m[3]);
|
||||
|
||||
const float& x = m(3, 0);
|
||||
const float& y = m(3, 1);
|
||||
const float& z = m(3, 2);
|
||||
const float& w = m(3, 3);
|
||||
|
||||
TVector3<T, false> s = CrossP(a, b);
|
||||
TVector3<T, false> t = CrossP(c, d);
|
||||
TVector3<T, false> u = a * y - b * x;
|
||||
TVector3<T, false> v = c * w - d * z;
|
||||
return DotP(s, v) + DotP(t, u);
|
||||
}
|
||||
};
|
||||
|
||||
template<RealType T>
|
||||
struct compute_mat4_inv<T, false>
|
||||
{
|
||||
static constexpr bool map(Phanes::Core::Math::TMatrix4<T, false>& r, const Phanes::Core::Math::TMatrix4<T, false>& m)
|
||||
{
|
||||
const TVector3<T, false>& a = reinterpret_cast<TVector3<T, false>&>(m[0]);
|
||||
const TVector3<T, false>& b = reinterpret_cast<TVector3<T, false>&>(m[1]);
|
||||
const TVector3<T, false>& c = reinterpret_cast<TVector3<T, false>&>(m[2]);
|
||||
const TVector3<T, false>& d = reinterpret_cast<TVector3<T, false>&>(m[3]);
|
||||
|
||||
const float& x = m(3, 0);
|
||||
const float& y = m(3, 1);
|
||||
const float& z = m(3, 2);
|
||||
const float& w = m(3, 3);
|
||||
|
||||
TVector3<T, false> s = CrossP(a, b);
|
||||
TVector3<T, false> t = CrossP(c, d);
|
||||
TVector3<T, false> u = a * y - b * x;
|
||||
TVector3<T, false> v = c * w - d * z;
|
||||
|
||||
float _1_det = (T)1.0 / (DotP(s, v) + DotP(t, u));
|
||||
|
||||
if (_1_det == 0.0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
s *= _1_det;
|
||||
t *= _1_det;
|
||||
u *= _1_det;
|
||||
v *= _1_det;
|
||||
|
||||
TVector3<T, false> r0 = Cross(b, v) + t * y;
|
||||
TVector3<T, false> r1 = Cross(v, a) + t * x;
|
||||
TVector3<T, false> r2 = Cross(d, u) + s * w;
|
||||
TVector3<T, false> r3 = Cross(u, c) + s * z;
|
||||
|
||||
r = TMatrix4<T, false>(r0.x, r0.y, r0.z, -DotP(b, t),
|
||||
r1.x, r1.y, r1.z, DotP(a, t),
|
||||
r2.x, r2.y, r2.z, -DotP(d, s),
|
||||
r3.x, r3.y, r3.z, DotP(c, s));
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template<RealType T>
|
||||
struct compute_mat4_transpose<T, false>
|
||||
{
|
||||
static constexpr void map(Phanes::Core::Math::TMatrix4<T, S>& r, const Phanes::Core::Math::TMatrix4<T, S>& m)
|
||||
{
|
||||
r = Phanes::Core::Math::TMatrix4<T, false>(m(0, 0), m(1, 0), m(2, 0), m(3, 0),
|
||||
m(0, 1), m(1, 1), m(2, 1), m(3, 1),
|
||||
m(0, 2), m(1, 2), m(2, 2), m(3, 2),
|
||||
m(0, 3), m(1, 3), m(2, 3), m(3, 3));
|
||||
}
|
||||
};
|
||||
}
|
@ -38,10 +38,7 @@ namespace Phanes::Core::Math::Detail
|
||||
{
|
||||
static constexpr void map(Phanes::Core::Math::TVector4<T, false>& v1, const TVector4<T, false>& v2)
|
||||
{
|
||||
v1.x = v2.x;
|
||||
v1.y = v2.y;
|
||||
v1.z = v2.z;
|
||||
v1.w = v2.w;
|
||||
memcpy(v1.data, v2.data, 4 * sizeof(T));
|
||||
}
|
||||
|
||||
|
||||
@ -72,10 +69,7 @@ namespace Phanes::Core::Math::Detail
|
||||
|
||||
static constexpr void map(Phanes::Core::Math::TVector4<T, false>& v1, const T* comp)
|
||||
{
|
||||
v1.x = comp[0];
|
||||
v1.y = comp[1];
|
||||
v1.z = comp[2];
|
||||
v1.w = comp[3];
|
||||
memcpy(v1.data, comp, 4 * sizeof(T));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -29,7 +29,6 @@ namespace Phanes::Core::Math {
|
||||
template<RealType T> struct TRay;
|
||||
template<RealType T> struct TLine;
|
||||
template<RealType T> struct TPlane;
|
||||
template<RealType T> struct TMatrix4;
|
||||
template<RealType T> struct TQuaternion;
|
||||
template<RealType T> struct TTransform;
|
||||
template<RealType T> struct TPoint2;
|
||||
@ -40,6 +39,7 @@ namespace Phanes::Core::Math {
|
||||
template<IntType T> struct TIntPoint4;
|
||||
template<RealType T> struct TMatrix2;
|
||||
template<RealType T, bool S> struct TMatrix3;
|
||||
template<RealType T, bool S> struct TMatrix4;
|
||||
template<RealType T, bool S> struct TVector2;
|
||||
template<RealType T, bool S> struct TVector3;
|
||||
template<RealType T, bool S> struct TVector4;
|
||||
|
@ -279,7 +279,7 @@ namespace Phanes::Core::Math {
|
||||
template<RealType T>
|
||||
TMatrix2<T> TransposeV(TMatrix2<T>& m1)
|
||||
{
|
||||
Swap(m1(0, 1), m1(1, 0));
|
||||
Swap(m1(0, 1), m1(1, 0));
|
||||
}
|
||||
|
||||
// =============== //
|
||||
@ -299,7 +299,7 @@ namespace Phanes::Core::Math {
|
||||
TMatrix2<T> Transpose(const TMatrix2<T>& m1)
|
||||
{
|
||||
return TMatrix2<T>(m1(0, 0), m1(1, 0),
|
||||
m1(0, 1), m1(1, 1));
|
||||
m1(0, 1), m1(1, 1));
|
||||
}
|
||||
|
||||
template<RealType T>
|
||||
@ -312,4 +312,7 @@ namespace Phanes::Core::Math {
|
||||
} // Phanes::Core::Math
|
||||
|
||||
|
||||
#endif // !MATRIX2_H
|
||||
#endif // !MATRIX2_H
|
||||
|
||||
|
||||
#include "Core/public/Math/SIMD/SIMDIntrinsics.h"
|
@ -111,17 +111,17 @@ namespace Phanes::Core::Math {
|
||||
|
||||
FORCEINLINE T& operator() (int n, int m)
|
||||
{
|
||||
return this->m[m][n];
|
||||
return this->data[m][n];
|
||||
}
|
||||
|
||||
FORCEINLINE TVector3<T>& operator[] (int m)
|
||||
FORCEINLINE TVector3<T, S>& operator[] (int m)
|
||||
{
|
||||
return (*reinterpret_cast<TVector3<T>*>(this->m[m]));
|
||||
return (*reinterpret_cast<TVector3<T, S>*>(this->m[m]));
|
||||
}
|
||||
|
||||
FORCEINLINE const T& operator() (int n, int m) const
|
||||
{
|
||||
return this->m[m][n];
|
||||
return this->data[m][n];
|
||||
}
|
||||
|
||||
FORCEINLINE const TVector3<T, S>& operator[] (int m) const
|
||||
@ -249,8 +249,8 @@ namespace Phanes::Core::Math {
|
||||
TMatrix3<T, S> operator+ (const TMatrix3<T, S>& m, T s)
|
||||
{
|
||||
return TMatrix3<T, S>(m.c0 + s,
|
||||
m.c1 + s,
|
||||
m.c2 + s);
|
||||
m.c1 + s,
|
||||
m.c2 + s);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -384,25 +384,26 @@ namespace Phanes::Core::Math {
|
||||
*/
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix3<T, S> InverseV(TMatrix3<T, S>& m1)
|
||||
bool InverseV(TMatrix3<T, S>& m1)
|
||||
{
|
||||
const TVector3<T, S>& v0 = m1[0];
|
||||
const TVector3<T, S>& v1 = m1[1];
|
||||
const TVector3<T, S>& v2 = m1[2];
|
||||
|
||||
TVector3<T, S> r0 = CrossP(v1, v2);
|
||||
TVector3<T, S> r1 = CrossP(v2, v0);
|
||||
TVector3<T, S> r2 = CrossP(v0, v1);
|
||||
TVector3<T, S> r0 = CrossP(m1.c1, m1.c2);
|
||||
TVector3<T, S> r1 = CrossP(m1.c2, m1.c0);
|
||||
TVector3<T, S> r2 = CrossP(m1.c0, m1.c1);
|
||||
|
||||
T _1_det = (T)1.0 / Determinant(m1);
|
||||
|
||||
if (_1_det == (T)0.0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
m1 = TMatrix3<T, S>(r0.x, r0.y, r0.z,
|
||||
r1.x, r1.y, r1.z,
|
||||
r2.x, r2.y, r2.z);
|
||||
|
||||
m1 *= _1_det;
|
||||
|
||||
return m1;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -414,14 +415,7 @@ namespace Phanes::Core::Math {
|
||||
*/
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix3<T, S> TransposeV(TMatrix3<T, S>& m1)
|
||||
{
|
||||
Swap(m1(0, 1), m1(1, 0));
|
||||
Swap(m1(0, 2), m1(2, 0));
|
||||
Swap(m1(1, 2), m1(2, 1));
|
||||
|
||||
return m1;
|
||||
}
|
||||
TMatrix3<T, S> TransposeV(TMatrix3<T, S>& m1);
|
||||
|
||||
|
||||
// =============== //
|
||||
@ -435,25 +429,26 @@ namespace Phanes::Core::Math {
|
||||
*/
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix3<T, S> Inverse(TMatrix3<T, S>& m1)
|
||||
bool Inverse(TMatrix3<T, S>& r, const TMatrix3<T, S>& m1)
|
||||
{
|
||||
const TVector3<T>& v0 = m1[0];
|
||||
const TVector3<T>& v1 = m1[1];
|
||||
const TVector3<T>& v2 = m1[2];
|
||||
|
||||
TVector3<T> r0 = CrossP(v1, v2);
|
||||
TVector3<T> r1 = CrossP(v2, v0);
|
||||
TVector3<T> r2 = CrossP(v0, v1);
|
||||
TVector3<T, S> r0 = CrossP(m1.c1, m1.c2);
|
||||
TVector3<T, S> r1 = CrossP(m1.c2, m1.c0);
|
||||
TVector3<T, S> r2 = CrossP(m1.c0, m1.c1);
|
||||
|
||||
T _1_det = (T)1.0 / Determinant(m1);
|
||||
|
||||
TMatrix3<T, S> inverse(r0.x, r0.y, r0.z,
|
||||
if (_1_det == (T)0.0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
r = TMatrix3<T, S>(r0.x, r0.y, r0.z,
|
||||
r1.x, r1.y, r1.z,
|
||||
r2.x, r2.y, r2.z);
|
||||
|
||||
inverse *= _1_det;
|
||||
r *= _1_det;
|
||||
|
||||
return inverse;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -465,12 +460,7 @@ namespace Phanes::Core::Math {
|
||||
*/
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix3<T, S> Transpose(const TMatrix3<T, S>& m1)
|
||||
{
|
||||
return TMatrix3<T, S>(m1(0, 0), m1(1, 0), m1(2, 0),
|
||||
m1(0, 1), m1(1, 1), m1(2, 1),
|
||||
m1(0, 2), m1(1, 2), m1(2, 2));
|
||||
}
|
||||
TMatrix3<T, S> Transpose(const TMatrix3<T, S>& m1);
|
||||
|
||||
/**
|
||||
* Checks if matrix is an identity matrix.
|
||||
@ -488,3 +478,5 @@ namespace Phanes::Core::Math {
|
||||
|
||||
|
||||
#endif // !MATRIX3_H
|
||||
|
||||
#include "Core/public/Math/Matrix3.inl"
|
27
Engine/Source/Runtime/Core/public/Math/Matrix3.inl
Normal file
27
Engine/Source/Runtime/Core/public/Math/Matrix3.inl
Normal file
@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#include "Core/public/Math/Boilerplate.h"
|
||||
|
||||
#include "Core/public/Math/Detail/Matrix3Decl.inl"
|
||||
#include "Core/public/Math/SIMD/SIMDIntrinsics.h"
|
||||
|
||||
#include "Core/public/Math/SIMD/PhanesSIMDTypes.h"
|
||||
|
||||
namespace Phanes::Core::Math
|
||||
{
|
||||
template<RealType T, bool S>
|
||||
TMatrix3<T, S> TransposeV(const TMatrix3<T, S>& m)
|
||||
{
|
||||
Detail::compute_mat3_transpose<T, S>::map(m, m);
|
||||
return m;
|
||||
}
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix3<T, S> Transpose(const TMatrix3<T, S>& m)
|
||||
{
|
||||
TMatrix3<T, S> r;
|
||||
Detail::compute_mat3_transpose<T, S>::map(r, m);
|
||||
return r;
|
||||
|
||||
}
|
||||
}
|
144
Engine/Source/Runtime/Core/public/Math/Matrix4.hpp
Normal file
144
Engine/Source/Runtime/Core/public/Math/Matrix4.hpp
Normal file
@ -0,0 +1,144 @@
|
||||
#pragma once
|
||||
|
||||
#include "Core/public/Math/Boilerplate.h"
|
||||
|
||||
#include "Core/public/Math/MathAbstractTypes.h"
|
||||
#include "Core/public/Math/MathFwd.h"
|
||||
#include "Core/public/Math/Vector4.hpp"
|
||||
|
||||
#ifndef MATRIX4_H
|
||||
#define MATRIX4_H
|
||||
|
||||
namespace Phanes::Core::Math {
|
||||
|
||||
// 4x4 Matrix defined in column-major order.
|
||||
|
||||
template<RealType T, bool S>
|
||||
struct TMatrix4
|
||||
{
|
||||
public:
|
||||
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
TVector4<T, S> c0;
|
||||
TVector4<T, S> c1;
|
||||
TVector4<T, S> c2;
|
||||
TVector4<T, S> c3;
|
||||
};
|
||||
|
||||
T data[4][4];
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
FORCEINLINE T& operator() (int n, int m)
|
||||
{
|
||||
return this->data[m][n];
|
||||
}
|
||||
FORCEINLINE TVector4<T, S>& operator[] (int m)
|
||||
{
|
||||
return (*reinterpret_cast<TVector4<T, S>*>(this->m[m]));
|
||||
}
|
||||
|
||||
FORCEINLINE const T& operator() (int n, int m) const
|
||||
{
|
||||
return this->data[m][n];
|
||||
}
|
||||
FORCEINLINE const TVector4<T, S>& operator[] (int m) const
|
||||
{
|
||||
return (*reinterpret_cast<TVector4<T, S>*>(this->m[m]));
|
||||
}
|
||||
};
|
||||
|
||||
// ==================== //
|
||||
// Matrix4 operator //
|
||||
// ==================== //
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> operator+= (TMatrix4<T, S>& a, T s);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> operator+= (TMatrix4<T, S>& a, const TMatrix4<T, S>& b);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> operator-= (TMatrix4<T, S>& a, T s);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> operator-= (TMatrix4<T, S>& a, const TMatrix4<T, S>& b);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> operator*= (TMatrix4<T, S>& a, T s);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> operator*= (TMatrix4<T, S>& a, const TMatrix4<T, S>& b);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> operator+ (const TMatrix4<T, S>& a, T s);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> operator+ (const TMatrix4<T, S>& a, const TMatrix4<T, S>& b);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> operator- (const TMatrix4<T, S>& a, T s);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> operator- (const TMatrix4<T, S>& a, const TMatrix4<T, S>& b);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> operator* (const TMatrix4<T, S>& a, T s);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> operator* (const TMatrix4<T, S>& a, const TMatrix4<T, S>& b);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TVector4<T, S> operator* (const TMatrix4<T, S>& a, const TVector4<T, S>& v);
|
||||
|
||||
template<RealType T, bool S>
|
||||
bool operator== (const TMatrix4<T, S>& a, const TMatrix4<T, S>& b);
|
||||
|
||||
template<RealType T, bool S>
|
||||
bool operator!= (const TMatrix4<T, S>& a, const TMatrix4<T, S>& b);
|
||||
|
||||
|
||||
// ================================ //
|
||||
// Matrix4 function definition //
|
||||
// ================================ //
|
||||
|
||||
template<RealType T, bool S>
|
||||
T Determinant(const TMatrix4<T, S>& m);
|
||||
|
||||
template<RealType T, bool S>
|
||||
bool InverseV(TMatrix4<T, S>& a);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> TransposeV(TMatrix4<T, S>& a);
|
||||
|
||||
// =============== //
|
||||
// WITH RETURN //
|
||||
// =============== //
|
||||
|
||||
|
||||
template<RealType T, bool S>
|
||||
bool Inverse(TMatrix4<T, S>& a);
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> Transpose(const TMatrix4<T, S>& a);
|
||||
|
||||
template<RealType T, bool S>
|
||||
FORCEINLINE bool IsIndentityMatrix(const TMatrix4<T, S>& a)
|
||||
{
|
||||
return (abs(m1(0, 0) - (T)1.0) < P_FLT_INAC && abs(m1(0, 1) - (T)0.0) < P_FLT_INAC && abs(m1(0, 2) - (T)0.0) < P_FLT_INAC && abs(m1(0, 3) - (T)0.0) < P_FLT_INAC &&
|
||||
abs(m1(1, 0) - (T)0.0) < P_FLT_INAC && abs(m1(1, 1) - (T)1.0) < P_FLT_INAC && abs(m1(1, 2) - (T)0.0) < P_FLT_INAC && abs(m1(1, 3) - (T)0.0) < P_FLT_INAC &&
|
||||
abs(m1(2, 0) - (T)0.0) < P_FLT_INAC && abs(m1(2, 1) - (T)0.0) < P_FLT_INAC && abs(m1(2, 2) - (T)1.0) < P_FLT_INAC && abs(m1(2, 3) - (T)0.0) < P_FLT_INAC &&
|
||||
abs(m1(3, 0) - (T)0.0) < P_FLT_INAC && abs(m1(3, 1) - (T)0.0) < P_FLT_INAC && abs(m1(3, 2) - (T)1.0) < P_FLT_INAC && abs(m1(3, 3) - (T)0.0) < P_FLT_INAC);
|
||||
}
|
||||
|
||||
|
||||
} // Phanes::Core::Math
|
||||
|
||||
|
||||
#endif // !MATRIX4_H
|
||||
|
||||
#include "Core/public/Math/Matrix4.inl"
|
46
Engine/Source/Runtime/Core/public/Math/Matrix4.inl
Normal file
46
Engine/Source/Runtime/Core/public/Math/Matrix4.inl
Normal file
@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
#include "Core/public/Math/Boilerplate.h"
|
||||
|
||||
#include "Core/public/Math/Detail/Matrix4Decl.inl"
|
||||
#include "Core/public/Math/SIMD/SIMDIntrinsics.h"
|
||||
|
||||
#include "Core/public/Math/SIMD/PhanesSIMDTypes.h"
|
||||
|
||||
|
||||
namespace Phanes::Core::Math
|
||||
{
|
||||
template<RealType T, bool S>
|
||||
T Determinant(const TMatrix4<T, S>& m)
|
||||
{
|
||||
return Detail::compute_mat4_det<T, S>::map(m);
|
||||
}
|
||||
|
||||
template<RealType T, bool S>
|
||||
bool InverseV(TMatrix4<T, S>& a)
|
||||
{
|
||||
return Detail::compute_mat4_inv<T, S>::map(a, a);
|
||||
}
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> TransposeV(TMatrix4<T, S>& a)
|
||||
{
|
||||
return Detail::compute_mat4_transpose<T, S>::map(a, a);
|
||||
}
|
||||
|
||||
template<RealType T, bool S>
|
||||
bool Inverse(TMatrix4<T, S>& a)
|
||||
{
|
||||
TMatrix4<T, S> r;
|
||||
return Detail::compute_mat4_inv<T, S>::map(r, a);
|
||||
return r;
|
||||
}
|
||||
|
||||
template<RealType T, bool S>
|
||||
TMatrix4<T, S> Transpose(TMatrix4<T, S>& a)
|
||||
{
|
||||
TMatrix4<T, S> r;
|
||||
return Detail::compute_mat4_transpose<T, S>::map(r, a);
|
||||
return r;
|
||||
}
|
||||
}
|
@ -15,6 +15,9 @@
|
||||
#include "Core/public/Math/IntVector3.hpp"
|
||||
#include "Core/public/Math/IntVector4.hpp"
|
||||
|
||||
#include "Core/public/Math/Matrix3.hpp"
|
||||
#include "Core/public/Math/Matrix4.hpp"
|
||||
|
||||
|
||||
// ========== //
|
||||
// Common //
|
||||
@ -739,4 +742,476 @@ namespace Phanes::Core::Math::Detail
|
||||
r.comp = _mm_srl_epi64(v1.comp, _mm_set1_epi64x(s));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// =========== //
|
||||
// Matrix3 //
|
||||
// =========== //
|
||||
|
||||
template<>
|
||||
struct compute_mat3_transpose<float, true>
|
||||
{
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TMatrix3<float, true>& r, const TMatrix3<float, true>& m1)
|
||||
{
|
||||
__m128 tmp0 = _mm_shuffle_ps(m1.c0.data, m1.c1.data, 0x44);
|
||||
__m128 tmp2 = _mm_shuffle_ps(m1.c0.data, m1.c1.data, 0xEE);
|
||||
__m128 tmp1 = _mm_shuffle_ps(m1.c2.data, m1.c2.data, 0x44);
|
||||
__m128 tmp3 = _mm_shuffle_ps(m1.c2.data, m1.c2.data, 0xEE);
|
||||
|
||||
r.c0.data = _mm_shuffle_ps(tmp0, tmp1, 0x88);
|
||||
r.c1.data = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
|
||||
r.c2.data = _mm_shuffle_ps(tmp2, tmp3, 0x88);
|
||||
}
|
||||
};
|
||||
|
||||
// =========== //
|
||||
// Matrix4 //
|
||||
// =========== //
|
||||
|
||||
template<>
|
||||
struct compute_mat4_det<float, true>
|
||||
{
|
||||
|
||||
// From: GLM: https://github.com/g-truc/glm/blob/master/glm/simd/matrix.h (MIT License)
|
||||
static FORCEINLINE float map(const TMatrix4<float, true>& m1)
|
||||
{
|
||||
__m128 Fac0;
|
||||
{
|
||||
// valType SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
|
||||
// valType SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
|
||||
// valType SubFactor06 = m[1][2] * m[3][3] - m[3][2] * m[1][3];
|
||||
// valType SubFactor13 = m[1][2] * m[2][3] - m[2][2] * m[1][3];
|
||||
|
||||
__m128 Swp0a = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
__m128 Swp0b = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
__m128 Swp00 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp03 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
|
||||
__m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
|
||||
Fac0 = _mm_sub_ps(Mul00, Mul01);
|
||||
}
|
||||
|
||||
__m128 Fac1;
|
||||
{
|
||||
// valType SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3];
|
||||
// valType SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3];
|
||||
// valType SubFactor07 = m[1][1] * m[3][3] - m[3][1] * m[1][3];
|
||||
// valType SubFactor14 = m[1][1] * m[2][3] - m[2][1] * m[1][3];
|
||||
|
||||
__m128 Swp0a = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
__m128 Swp0b = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
__m128 Swp00 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp03 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
|
||||
__m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
|
||||
Fac1 = _mm_sub_ps(Mul00, Mul01);
|
||||
}
|
||||
|
||||
|
||||
__m128 Fac2;
|
||||
{
|
||||
// valType SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2];
|
||||
// valType SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2];
|
||||
// valType SubFactor08 = m[1][1] * m[3][2] - m[3][1] * m[1][2];
|
||||
// valType SubFactor15 = m[1][1] * m[2][2] - m[2][1] * m[1][2];
|
||||
|
||||
__m128 Swp0a = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 Swp0b = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
__m128 Swp00 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp03 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
__m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
|
||||
__m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
|
||||
Fac2 = _mm_sub_ps(Mul00, Mul01);
|
||||
}
|
||||
|
||||
__m128 Fac3;
|
||||
{
|
||||
// valType SubFactor03 = m[2][0] * m[3][3] - m[3][0] * m[2][3];
|
||||
// valType SubFactor03 = m[2][0] * m[3][3] - m[3][0] * m[2][3];
|
||||
// valType SubFactor09 = m[1][0] * m[3][3] - m[3][0] * m[1][3];
|
||||
// valType SubFactor16 = m[1][0] * m[2][3] - m[2][0] * m[1][3];
|
||||
|
||||
__m128 Swp0a = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
__m128 Swp0b = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
__m128 Swp00 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp03 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
|
||||
__m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
|
||||
Fac3 = _mm_sub_ps(Mul00, Mul01);
|
||||
}
|
||||
|
||||
__m128 Fac4;
|
||||
{
|
||||
// valType SubFactor04 = m[2][0] * m[3][2] - m[3][0] * m[2][2];
|
||||
// valType SubFactor04 = m[2][0] * m[3][2] - m[3][0] * m[2][2];
|
||||
// valType SubFactor10 = m[1][0] * m[3][2] - m[3][0] * m[1][2];
|
||||
// valType SubFactor17 = m[1][0] * m[2][2] - m[2][0] * m[1][2];
|
||||
|
||||
__m128 Swp0a = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 Swp0b = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
__m128 Swp00 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp03 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
__m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
|
||||
__m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
|
||||
Fac4 = _mm_sub_ps(Mul00, Mul01);
|
||||
}
|
||||
|
||||
__m128 Fac5;
|
||||
{
|
||||
// valType SubFactor05 = m[2][0] * m[3][1] - m[3][0] * m[2][1];
|
||||
// valType SubFactor05 = m[2][0] * m[3][1] - m[3][0] * m[2][1];
|
||||
// valType SubFactor12 = m[1][0] * m[3][1] - m[3][0] * m[1][1];
|
||||
// valType SubFactor18 = m[1][0] * m[2][1] - m[2][0] * m[1][1];
|
||||
|
||||
__m128 Swp0a = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 Swp0b = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
__m128 Swp00 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp03 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
__m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
|
||||
__m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
|
||||
Fac5 = _mm_sub_ps(Mul00, Mul01);
|
||||
}
|
||||
|
||||
__m128 SignA = _mm_set_ps(1.0f, -1.0f, 1.0f, -1.0f);
|
||||
__m128 SignB = _mm_set_ps(-1.0f, 1.0f, -1.0f, 1.0f);
|
||||
|
||||
// m[1][0]
|
||||
// m[0][0]
|
||||
// m[0][0]
|
||||
// m[0][0]
|
||||
__m128 Temp0 = _mm_shuffle_ps(m1.c1.data, m1.c0.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Vec0 = _mm_shuffle_ps(Temp0, Temp0, _MM_SHUFFLE(2, 2, 2, 0));
|
||||
|
||||
// m[1][1]
|
||||
// m[0][1]
|
||||
// m[0][1]
|
||||
// m[0][1]
|
||||
__m128 Temp1 = _mm_shuffle_ps(m1.c1.data, m1.c0.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 Vec1 = _mm_shuffle_ps(Temp1, Temp1, _MM_SHUFFLE(2, 2, 2, 0));
|
||||
|
||||
// m[1][2]
|
||||
// m[0][2]
|
||||
// m[0][2]
|
||||
// m[0][2]
|
||||
__m128 Temp2 = _mm_shuffle_ps(m1.c1.data, m1.c0.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 Vec2 = _mm_shuffle_ps(Temp2, Temp2, _MM_SHUFFLE(2, 2, 2, 0));
|
||||
|
||||
// m[1][3]
|
||||
// m[0][3]
|
||||
// m[0][3]
|
||||
// m[0][3]
|
||||
__m128 Temp3 = _mm_shuffle_ps(m1.c1.data, m1.c0.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
__m128 Vec3 = _mm_shuffle_ps(Temp3, Temp3, _MM_SHUFFLE(2, 2, 2, 0));
|
||||
|
||||
// col0
|
||||
// + (Vec1[0] * Fac0[0] - Vec2[0] * Fac1[0] + Vec3[0] * Fac2[0]),
|
||||
// - (Vec1[1] * Fac0[1] - Vec2[1] * Fac1[1] + Vec3[1] * Fac2[1]),
|
||||
// + (Vec1[2] * Fac0[2] - Vec2[2] * Fac1[2] + Vec3[2] * Fac2[2]),
|
||||
// - (Vec1[3] * Fac0[3] - Vec2[3] * Fac1[3] + Vec3[3] * Fac2[3]),
|
||||
__m128 Mul00 = _mm_mul_ps(Vec1, Fac0);
|
||||
__m128 Mul01 = _mm_mul_ps(Vec2, Fac1);
|
||||
__m128 Mul02 = _mm_mul_ps(Vec3, Fac2);
|
||||
__m128 Sub00 = _mm_sub_ps(Mul00, Mul01);
|
||||
__m128 Add00 = _mm_add_ps(Sub00, Mul02);
|
||||
__m128 Inv0 = _mm_mul_ps(SignB, Add00);
|
||||
|
||||
// col1
|
||||
// - (Vec0[0] * Fac0[0] - Vec2[0] * Fac3[0] + Vec3[0] * Fac4[0]),
|
||||
// + (Vec0[0] * Fac0[1] - Vec2[1] * Fac3[1] + Vec3[1] * Fac4[1]),
|
||||
// - (Vec0[0] * Fac0[2] - Vec2[2] * Fac3[2] + Vec3[2] * Fac4[2]),
|
||||
// + (Vec0[0] * Fac0[3] - Vec2[3] * Fac3[3] + Vec3[3] * Fac4[3]),
|
||||
__m128 Mul03 = _mm_mul_ps(Vec0, Fac0);
|
||||
__m128 Mul04 = _mm_mul_ps(Vec2, Fac3);
|
||||
__m128 Mul05 = _mm_mul_ps(Vec3, Fac4);
|
||||
__m128 Sub01 = _mm_sub_ps(Mul03, Mul04);
|
||||
__m128 Add01 = _mm_add_ps(Sub01, Mul05);
|
||||
__m128 Inv1 = _mm_mul_ps(SignA, Add01);
|
||||
|
||||
// col2
|
||||
// + (Vec0[0] * Fac1[0] - Vec1[0] * Fac3[0] + Vec3[0] * Fac5[0]),
|
||||
// - (Vec0[0] * Fac1[1] - Vec1[1] * Fac3[1] + Vec3[1] * Fac5[1]),
|
||||
// + (Vec0[0] * Fac1[2] - Vec1[2] * Fac3[2] + Vec3[2] * Fac5[2]),
|
||||
// - (Vec0[0] * Fac1[3] - Vec1[3] * Fac3[3] + Vec3[3] * Fac5[3]),
|
||||
__m128 Mul06 = _mm_mul_ps(Vec0, Fac1);
|
||||
__m128 Mul07 = _mm_mul_ps(Vec1, Fac3);
|
||||
__m128 Mul08 = _mm_mul_ps(Vec3, Fac5);
|
||||
__m128 Sub02 = _mm_sub_ps(Mul06, Mul07);
|
||||
__m128 Add02 = _mm_add_ps(Sub02, Mul08);
|
||||
__m128 Inv2 = _mm_mul_ps(SignB, Add02);
|
||||
|
||||
// col3
|
||||
// - (Vec1[0] * Fac2[0] - Vec1[0] * Fac4[0] + Vec2[0] * Fac5[0]),
|
||||
// + (Vec1[0] * Fac2[1] - Vec1[1] * Fac4[1] + Vec2[1] * Fac5[1]),
|
||||
// - (Vec1[0] * Fac2[2] - Vec1[2] * Fac4[2] + Vec2[2] * Fac5[2]),
|
||||
// + (Vec1[0] * Fac2[3] - Vec1[3] * Fac4[3] + Vec2[3] * Fac5[3]));
|
||||
__m128 Mul09 = _mm_mul_ps(Vec0, Fac2);
|
||||
__m128 Mul10 = _mm_mul_ps(Vec1, Fac4);
|
||||
__m128 Mul11 = _mm_mul_ps(Vec2, Fac5);
|
||||
__m128 Sub03 = _mm_sub_ps(Mul09, Mul10);
|
||||
__m128 Add03 = _mm_add_ps(Sub03, Mul11);
|
||||
__m128 Inv3 = _mm_mul_ps(SignA, Add03);
|
||||
|
||||
__m128 Row0 = _mm_shuffle_ps(Inv0, Inv1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Row1 = _mm_shuffle_ps(Inv2, Inv3, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Row2 = _mm_shuffle_ps(Row0, Row1, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
|
||||
// valType Determinant = m[0][0] * Inverse[0][0]
|
||||
// + m[0][1] * Inverse[1][0]
|
||||
// + m[0][2] * Inverse[2][0]
|
||||
// + m[0][3] * Inverse[3][0];
|
||||
__m128 Det0 = Phanes::Core::Math::SIMD::vec4_dot(m1.c0.data, Row2);
|
||||
return _mm_cvtss_f32(Det0);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<>
|
||||
struct compute_mat4_inv<float, false>
|
||||
{
|
||||
// From: GLM: https://github.com/g-truc/glm/blob/master/glm/simd/matrix.h (MIT License)
|
||||
static FORCEINLINE bool map(Phanes::Core::Math::TMatrix4<float, true>& r, const Phanes::Core::Math::TMatrix4<float, true>& m1)
|
||||
{
|
||||
__m128 Fac0;
|
||||
{
|
||||
// valType SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
|
||||
// valType SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
|
||||
// valType SubFactor06 = m[1][2] * m[3][3] - m[3][2] * m[1][3];
|
||||
// valType SubFactor13 = m[1][2] * m[2][3] - m[2][2] * m[1][3];
|
||||
|
||||
__m128 Swp0a = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
__m128 Swp0b = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
__m128 Swp00 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp03 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
|
||||
__m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
|
||||
Fac0 = _mm_sub_ps(Mul00, Mul01);
|
||||
}
|
||||
|
||||
__m128 Fac1;
|
||||
{
|
||||
// valType SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3];
|
||||
// valType SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3];
|
||||
// valType SubFactor07 = m[1][1] * m[3][3] - m[3][1] * m[1][3];
|
||||
// valType SubFactor14 = m[1][1] * m[2][3] - m[2][1] * m[1][3];
|
||||
|
||||
__m128 Swp0a = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
__m128 Swp0b = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
__m128 Swp00 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp03 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
|
||||
__m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
|
||||
Fac1 = _mm_sub_ps(Mul00, Mul01);
|
||||
}
|
||||
|
||||
|
||||
__m128 Fac2;
|
||||
{
|
||||
// valType SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2];
|
||||
// valType SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2];
|
||||
// valType SubFactor08 = m[1][1] * m[3][2] - m[3][1] * m[1][2];
|
||||
// valType SubFactor15 = m[1][1] * m[2][2] - m[2][1] * m[1][2];
|
||||
|
||||
__m128 Swp0a = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 Swp0b = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
__m128 Swp00 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp03 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
__m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
|
||||
__m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
|
||||
Fac2 = _mm_sub_ps(Mul00, Mul01);
|
||||
}
|
||||
|
||||
__m128 Fac3;
|
||||
{
|
||||
// valType SubFactor03 = m[2][0] * m[3][3] - m[3][0] * m[2][3];
|
||||
// valType SubFactor03 = m[2][0] * m[3][3] - m[3][0] * m[2][3];
|
||||
// valType SubFactor09 = m[1][0] * m[3][3] - m[3][0] * m[1][3];
|
||||
// valType SubFactor16 = m[1][0] * m[2][3] - m[2][0] * m[1][3];
|
||||
|
||||
__m128 Swp0a = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
__m128 Swp0b = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
__m128 Swp00 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp03 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
__m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
|
||||
__m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
|
||||
Fac3 = _mm_sub_ps(Mul00, Mul01);
|
||||
}
|
||||
|
||||
__m128 Fac4;
|
||||
{
|
||||
// valType SubFactor04 = m[2][0] * m[3][2] - m[3][0] * m[2][2];
|
||||
// valType SubFactor04 = m[2][0] * m[3][2] - m[3][0] * m[2][2];
|
||||
// valType SubFactor10 = m[1][0] * m[3][2] - m[3][0] * m[1][2];
|
||||
// valType SubFactor17 = m[1][0] * m[2][2] - m[2][0] * m[1][2];
|
||||
|
||||
__m128 Swp0a = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 Swp0b = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
__m128 Swp00 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp03 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
__m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
|
||||
__m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
|
||||
Fac4 = _mm_sub_ps(Mul00, Mul01);
|
||||
}
|
||||
|
||||
__m128 Fac5;
|
||||
{
|
||||
// valType SubFactor05 = m[2][0] * m[3][1] - m[3][0] * m[2][1];
|
||||
// valType SubFactor05 = m[2][0] * m[3][1] - m[3][0] * m[2][1];
|
||||
// valType SubFactor12 = m[1][0] * m[3][1] - m[3][0] * m[1][1];
|
||||
// valType SubFactor18 = m[1][0] * m[2][1] - m[2][0] * m[1][1];
|
||||
|
||||
__m128 Swp0a = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 Swp0b = _mm_shuffle_ps(m1.c3.data, m1.c2.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
__m128 Swp00 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
|
||||
__m128 Swp03 = _mm_shuffle_ps(m1.c2.data, m1.c1.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
__m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
|
||||
__m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
|
||||
Fac5 = _mm_sub_ps(Mul00, Mul01);
|
||||
}
|
||||
|
||||
__m128 SignA = _mm_set_ps(1.0f, -1.0f, 1.0f, -1.0f);
|
||||
__m128 SignB = _mm_set_ps(-1.0f, 1.0f, -1.0f, 1.0f);
|
||||
|
||||
// m[1][0]
|
||||
// m[0][0]
|
||||
// m[0][0]
|
||||
// m[0][0]
|
||||
__m128 Temp0 = _mm_shuffle_ps(m1.c1.data, m1.c0.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Vec0 = _mm_shuffle_ps(Temp0, Temp0, _MM_SHUFFLE(2, 2, 2, 0));
|
||||
|
||||
// m[1][1]
|
||||
// m[0][1]
|
||||
// m[0][1]
|
||||
// m[0][1]
|
||||
__m128 Temp1 = _mm_shuffle_ps(m1.c1.data, m1.c0.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 Vec1 = _mm_shuffle_ps(Temp1, Temp1, _MM_SHUFFLE(2, 2, 2, 0));
|
||||
|
||||
// m[1][2]
|
||||
// m[0][2]
|
||||
// m[0][2]
|
||||
// m[0][2]
|
||||
__m128 Temp2 = _mm_shuffle_ps(m1.c1.data, m1.c0.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 Vec2 = _mm_shuffle_ps(Temp2, Temp2, _MM_SHUFFLE(2, 2, 2, 0));
|
||||
|
||||
// m[1][3]
|
||||
// m[0][3]
|
||||
// m[0][3]
|
||||
// m[0][3]
|
||||
__m128 Temp3 = _mm_shuffle_ps(m1.c1.data, m1.c0.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
__m128 Vec3 = _mm_shuffle_ps(Temp3, Temp3, _MM_SHUFFLE(2, 2, 2, 0));
|
||||
|
||||
// col0
|
||||
// + (Vec1[0] * Fac0[0] - Vec2[0] * Fac1[0] + Vec3[0] * Fac2[0]),
|
||||
// - (Vec1[1] * Fac0[1] - Vec2[1] * Fac1[1] + Vec3[1] * Fac2[1]),
|
||||
// + (Vec1[2] * Fac0[2] - Vec2[2] * Fac1[2] + Vec3[2] * Fac2[2]),
|
||||
// - (Vec1[3] * Fac0[3] - Vec2[3] * Fac1[3] + Vec3[3] * Fac2[3]),
|
||||
__m128 Mul00 = _mm_mul_ps(Vec1, Fac0);
|
||||
__m128 Mul01 = _mm_mul_ps(Vec2, Fac1);
|
||||
__m128 Mul02 = _mm_mul_ps(Vec3, Fac2);
|
||||
__m128 Sub00 = _mm_sub_ps(Mul00, Mul01);
|
||||
__m128 Add00 = _mm_add_ps(Sub00, Mul02);
|
||||
__m128 Inv0 = _mm_mul_ps(SignB, Add00);
|
||||
|
||||
// col1
|
||||
// - (Vec0[0] * Fac0[0] - Vec2[0] * Fac3[0] + Vec3[0] * Fac4[0]),
|
||||
// + (Vec0[0] * Fac0[1] - Vec2[1] * Fac3[1] + Vec3[1] * Fac4[1]),
|
||||
// - (Vec0[0] * Fac0[2] - Vec2[2] * Fac3[2] + Vec3[2] * Fac4[2]),
|
||||
// + (Vec0[0] * Fac0[3] - Vec2[3] * Fac3[3] + Vec3[3] * Fac4[3]),
|
||||
__m128 Mul03 = _mm_mul_ps(Vec0, Fac0);
|
||||
__m128 Mul04 = _mm_mul_ps(Vec2, Fac3);
|
||||
__m128 Mul05 = _mm_mul_ps(Vec3, Fac4);
|
||||
__m128 Sub01 = _mm_sub_ps(Mul03, Mul04);
|
||||
__m128 Add01 = _mm_add_ps(Sub01, Mul05);
|
||||
__m128 Inv1 = _mm_mul_ps(SignA, Add01);
|
||||
|
||||
// col2
|
||||
// + (Vec0[0] * Fac1[0] - Vec1[0] * Fac3[0] + Vec3[0] * Fac5[0]),
|
||||
// - (Vec0[0] * Fac1[1] - Vec1[1] * Fac3[1] + Vec3[1] * Fac5[1]),
|
||||
// + (Vec0[0] * Fac1[2] - Vec1[2] * Fac3[2] + Vec3[2] * Fac5[2]),
|
||||
// - (Vec0[0] * Fac1[3] - Vec1[3] * Fac3[3] + Vec3[3] * Fac5[3]),
|
||||
__m128 Mul06 = _mm_mul_ps(Vec0, Fac1);
|
||||
__m128 Mul07 = _mm_mul_ps(Vec1, Fac3);
|
||||
__m128 Mul08 = _mm_mul_ps(Vec3, Fac5);
|
||||
__m128 Sub02 = _mm_sub_ps(Mul06, Mul07);
|
||||
__m128 Add02 = _mm_add_ps(Sub02, Mul08);
|
||||
__m128 Inv2 = _mm_mul_ps(SignB, Add02);
|
||||
|
||||
// col3
|
||||
// - (Vec1[0] * Fac2[0] - Vec1[0] * Fac4[0] + Vec2[0] * Fac5[0]),
|
||||
// + (Vec1[0] * Fac2[1] - Vec1[1] * Fac4[1] + Vec2[1] * Fac5[1]),
|
||||
// - (Vec1[0] * Fac2[2] - Vec1[2] * Fac4[2] + Vec2[2] * Fac5[2]),
|
||||
// + (Vec1[0] * Fac2[3] - Vec1[3] * Fac4[3] + Vec2[3] * Fac5[3]));
|
||||
__m128 Mul09 = _mm_mul_ps(Vec0, Fac2);
|
||||
__m128 Mul10 = _mm_mul_ps(Vec1, Fac4);
|
||||
__m128 Mul11 = _mm_mul_ps(Vec2, Fac5);
|
||||
__m128 Sub03 = _mm_sub_ps(Mul09, Mul10);
|
||||
__m128 Add03 = _mm_add_ps(Sub03, Mul11);
|
||||
__m128 Inv3 = _mm_mul_ps(SignA, Add03);
|
||||
|
||||
__m128 Row0 = _mm_shuffle_ps(Inv0, Inv1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Row1 = _mm_shuffle_ps(Inv2, Inv3, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
__m128 Row2 = _mm_shuffle_ps(Row0, Row1, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
|
||||
// valType Determinant = m[0][0] * Inverse[0][0]
|
||||
// + m[0][1] * Inverse[1][0]
|
||||
// + m[0][2] * Inverse[2][0]
|
||||
// + m[0][3] * Inverse[3][0];
|
||||
__m128 Det0 = Phanes::Core::Math::SIMD::vec4_dot(m1.c0.data, Row2);
|
||||
__m128 Rcp0 = _mm_div_ps(_mm_set1_ps(1.0f), Det0);
|
||||
//__m128 Rcp0 = _mm_rcp_ps(Det0);
|
||||
|
||||
// Inverse /= Determinant;
|
||||
r.c0.data = _mm_mul_ps(Inv0, Rcp0);
|
||||
r.c1.data = _mm_mul_ps(Inv1, Rcp0);
|
||||
r.c2.data = _mm_mul_ps(Inv2, Rcp0);
|
||||
r.c3.data = _mm_mul_ps(Inv3, Rcp0);
|
||||
}
|
||||
};
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user