Bug fixes / SIMD improvement.

This commit is contained in:
THoehne 2024-08-16 17:36:21 +02:00
parent ac70f0e062
commit 0a731ae2c4
9 changed files with 148 additions and 55 deletions

View File

@ -8,17 +8,46 @@ namespace Phanes::Core::Math::Detail
template<RealType T, bool S> template<RealType T, bool S>
struct compute_mat3_transpose {}; struct compute_mat3_transpose {};
template<RealType T, bool S>
struct compute_mat3_mul {};
template<RealType T> template<RealType T>
struct compute_mat3_transpose<T, false> struct compute_mat3_transpose<T, false>
{ {
static constexpr void map(Phanes::Core::Math::TMatrix3<T, false>& r, const TMatrix3<T, false>& m1) static constexpr void map(Phanes::Core::Math::TMatrix3<T, false>& r, const TMatrix3<T, false>& m1)
{ {
r = TMatrix4<T, false>(m1(0, 0), m1(1, 0), m1(2, 0), r = TMatrix3<T, false>(m1(0, 0), m1(1, 0), m1(2, 0),
m1(0, 1), m1(1, 1), m1(2, 1), m1(0, 1), m1(1, 1), m1(2, 1),
m1(0, 2), m1(1, 2), m1(2, 2) m1(0, 2), m1(1, 2), m1(2, 2)
); );
} }
};
template<RealType T>
struct compute_mat3_mul<T, false>
{
static constexpr void map(Phanes::Core::Math::TMatrix3<T, false>& r, const TMatrix3<T, false>& m1, const TMatrix3<T, false>& m2)
{
r(0, 0) = m1(0, 0) * m2(0, 0) + m1(0, 1) * m2(1, 0) + m1(0, 2) * m2(2, 0);
r(0, 1) = m1(0, 0) * m2(0, 1) + m1(0, 1) * m2(1, 1) + m1(0, 2) * m2(2, 1);
r(0, 2) = m1(0, 0) * m2(0, 2) + m1(0, 1) * m2(1, 2) + m1(0, 2) * m2(2, 2);
r(1, 0) = m1(1, 0) * m2(0, 0) + m1(1, 1) * m2(1, 0) + m1(1, 2) * m2(2, 0);
r(1, 1) = m1(1, 0) * m2(0, 1) + m1(1, 1) * m2(1, 1) + m1(1, 2) * m2(2, 1);
r(1, 2) = m1(1, 0) * m2(0, 2) + m1(1, 1) * m2(1, 2) + m1(1, 2) * m2(2, 2);
r(2, 0) = m1(2, 0) * m2(0, 0) + m1(2, 1) * m2(1, 0) + m1(2, 2) * m2(2, 0);
r(2, 1) = m1(2, 0) * m2(0, 1) + m1(2, 1) * m2(1, 1) + m1(2, 2) * m2(2, 1);
r(2, 2) = m1(2, 0) * m2(0, 2) + m1(2, 1) * m2(1, 2) + m1(2, 2) * m2(2, 2);
}
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const TMatrix3<T, false>& m1, const TVector3<T, false>& v)
{
r.x = m1(0, 0) * v.x + m1(0, 1) * v.y + m1(0, 2) * v.z;
r.y = m1(1, 0) * v.x + m1(1, 1) * v.y + m1(1, 2) * v.z;
r.z = m1(2, 0) * v.x + m1(2, 1) * v.y + m1(2, 2) * v.z;
}
}; };
} }

View File

@ -134,7 +134,7 @@ namespace Phanes::Core::Math {
template<> template<>
FORCEINLINE float Abs<float>(float s) FORCEINLINE float Abs<float>(float s)
{ {
return fabs(s); return (float)fabs(s);
}; };
template<> template<>

View File

@ -88,9 +88,31 @@ namespace Phanes::Core::Math {
// Matrix2 // Matrix2
typedef TMatrix2<float> Matrix2; typedef TMatrix2<float> Matrix2;
typedef TMatrix2<float> Matrix2f; typedef TMatrix2<float> Matrix2f;
typedef TMatrix2<double> Matrix2d; typedef TMatrix2<double> Matrix2d;
// Matrix3
typedef TMatrix3<float, false> Matrix3;
typedef TMatrix3<float, false> Matrix3f;
typedef TMatrix3<double, false> Matrix3d;
typedef TMatrix3<float, SIMD::use_simd<float, 3, true>::value> Matrix3Reg;
typedef TMatrix3<float, SIMD::use_simd<float, 3, true>::value> Matrix3Regf;
typedef TMatrix3<double, SIMD::use_simd<double, 3, true>::value> Matrix3Regd;
typedef TMatrix3<double, SIMD::use_simd<double, 3, false>::value> Matrix3Regf64;
// Matrix4
typedef TMatrix4<float, false> Matrix4;
typedef TMatrix4<float, false> Matrix4f;
typedef TMatrix4<double, false> Matrix4d;
typedef TMatrix3<float, SIMD::use_simd<float, 4, true>::value> Matrix4Reg;
typedef TMatrix3<float, SIMD::use_simd<float, 4, true>::value> Matrix4Regf;
typedef TMatrix3<double, SIMD::use_simd<double, 4, true>::value> Matrix4Regd;
typedef TMatrix3<double, SIMD::use_simd<double, 4, false>::value> Matrix4Regf64;
} // Phanes::Core::Math::coretypes } // Phanes::Core::Math::coretypes

View File

@ -104,6 +104,12 @@ namespace Phanes::Core::Math {
return "([" + ToString(m(0, 0)) + ", " + ToString(m(0, 1)) + "], [" + ToString(m(1, 0)) + ", " + ToString(m(1, 1)) + "])"; return "([" + ToString(m(0, 0)) + ", " + ToString(m(0, 1)) + "], [" + ToString(m(1, 0)) + ", " + ToString(m(1, 1)) + "])";
} }
template<RealType T, bool S>
std::string ToString(const TMatrix3<T, S>& m)
{
return "([" + ToString(m(0, 0)) + ", " + ToString(m(0, 1)) + ", " + ToString(m(0, 2)) + "], [" + ToString(m(1, 0)) + ", " + ToString(m(1, 1)) + ", " + ToString(m(1, 2)) + "], [" + ToString(m(2, 0)) + ", " + ToString(m(2, 1)) + ", " + ToString(m(2, 2)) + "])";
}
//std::string toString(const Matrix3& v); //std::string toString(const Matrix3& v);

View File

@ -15,6 +15,7 @@ namespace Phanes::Core::Math {
// 3x3 Matrix defined in column-major order. // 3x3 Matrix defined in column-major order.
// Accessed by M[Row][Col]. // Accessed by M[Row][Col].
template<RealType T, bool S> template<RealType T, bool S>
struct TMatrix3 struct TMatrix3
{ {
@ -39,9 +40,10 @@ namespace Phanes::Core::Math {
/// </summary> /// </summary>
TVector3<T, S> c2; TVector3<T, S> c2;
}; };
T data[3][4];
}; };
T data[3][3];
public: public:
@ -76,9 +78,9 @@ namespace Phanes::Core::Math {
* Construct Matrix from parameters. * Construct Matrix from parameters.
* *
* @param(n00) M[0][0] * @param(n00) M[0][0]
* @param(n10) M[1][0] * @param(n10) M[0][1]
* @param(n01) M[0][1] * @param(n20) M[0][2]
* @param(n11) M[1][1] * @param(n01) M[1][0]
* ... * ...
* *
* @note nXY = n[Row][Col] * @note nXY = n[Row][Col]
@ -109,6 +111,11 @@ namespace Phanes::Core::Math {
public: public:
FORCEINLINE T operator() (int n, int m) const
{
return this->data[m][n];
}
FORCEINLINE T& operator() (int n, int m) FORCEINLINE T& operator() (int n, int m)
{ {
return this->data[m][n]; return this->data[m][n];
@ -119,12 +126,7 @@ namespace Phanes::Core::Math {
return (*reinterpret_cast<TVector3<T, S>*>(this->m[m])); return (*reinterpret_cast<TVector3<T, S>*>(this->m[m]));
} }
FORCEINLINE const T& operator() (int n, int m) const FORCEINLINE const TVector3<T, S> operator[] (int m) const
{
return this->data[m][n];
}
FORCEINLINE const TVector3<T, S>& operator[] (int m) const
{ {
return (*reinterpret_cast<TVector3<T, S>*>(this->m[m])); return (*reinterpret_cast<TVector3<T, S>*>(this->m[m]));
} }
@ -229,14 +231,7 @@ namespace Phanes::Core::Math {
*/ */
template<RealType T, bool S> template<RealType T, bool S>
TMatrix3<T, S> operator*= (TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2) TMatrix3<T, S> operator*= (TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2);
{
m1.c0 *= m2.c0;
m1.c1 *= m2.c1;
m1.c2 *= m2.c2;
return m1;
}
/** /**
* Multiply matrix with scalar * Multiply matrix with scalar
@ -298,9 +293,9 @@ namespace Phanes::Core::Math {
template<RealType T, bool S> template<RealType T, bool S>
TMatrix3<T, S> operator+ (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2) TMatrix3<T, S> operator+ (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
{ {
return TMatrix2<T>(m1.c0 + m2.c0, return TMatrix3<T, S>(m1.c0 + m2.c0,
m1.c1 + m2.c1, m1.c1 + m2.c1,
m1.c2 + m2.c2); m1.c2 + m2.c2);
} }
/** /**
@ -348,21 +343,6 @@ namespace Phanes::Core::Math {
m.c2 * s); m.c2 * s);
} }
/**
* Multiplay matrix by matrix (componentwise)
*
* @param(m1) Matrix
* @param(m2) Matrix
*/
template<RealType T, bool S>
TMatrix3<T, S> operator/ (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
{
return TMatrix3<T, S>(m1.c0 / m2.c0,
m1.c1 / m2.c1,
m1.c2 / m2.c2);
}
/** /**
* Multiply scalar with matrix * Multiply scalar with matrix
* *
@ -387,12 +367,10 @@ namespace Phanes::Core::Math {
*/ */
template<RealType T, bool S> template<RealType T, bool S>
TMatrix3<T, S> operator* (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2) TMatrix3<T, S> operator* (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2);
{
return TMatrix3<T, S>(m1.c0 * m2.c0, template<RealType T, bool S>
m1.c1 * m2.c1, TVector3<T, S> operator* (const TMatrix3<T, S>& m1, const TVector3<T, S>& v);
m1.c2 * m2.c2);
}
/** /**
* Compare matrix with other matrix. * Compare matrix with other matrix.
@ -495,7 +473,7 @@ namespace Phanes::Core::Math {
*/ */
template<RealType T, bool S> template<RealType T, bool S>
bool Inverse(TMatrix3<T, S>& r, const TMatrix3<T, S>& m1) bool Inverse(const TMatrix3<T, S>& m1, Ref<TMatrix3<T, S>> r)
{ {
TVector3<T, S> r0 = CrossP(m1.c1, m1.c2); TVector3<T, S> r0 = CrossP(m1.c1, m1.c2);
TVector3<T, S> r1 = CrossP(m1.c2, m1.c0); TVector3<T, S> r1 = CrossP(m1.c2, m1.c0);

View File

@ -24,4 +24,28 @@ namespace Phanes::Core::Math
return r; return r;
} }
template<RealType T, bool S>
TMatrix3<T, S> operator*= (TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
{
TMatrix3<T, S> r;
Detail::compute_mat3_mul<T, S>::map(r, m1, m2);
return (m1 = r);
}
template<RealType T, bool S>
TMatrix3<T, S> operator* (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
{
TMatrix3<T, S> r;
Detail::compute_mat3_mul<T, S>::map(r, m1, m2);
return r;
}
template<RealType T, bool S>
TVector3<T, S> operator* (const TMatrix3<T, S>& m1, const TVector3<T, S>& v)
{
TVector3<T, S> r;
Detail::compute_mat3_mul<T, S>::map(r, m1, v);
return r;
}
} }

View File

@ -26,7 +26,6 @@ namespace Phanes::Core::Math::SIMD
/// </summary> /// </summary>
/// <typeparam name="T">Type of vector</typeparam> /// <typeparam name="T">Type of vector</typeparam>
/// <typeparam name="L">Length of vector</typeparam> /// <typeparam name="L">Length of vector</typeparam>
/// <typeparam name="SimdActive">Whether SIMD intrinsics exist, that support the vector type and length.</typeparam>
/// <typeparam name="IsAligned">Whether the vector is aligned for simd usage.</typeparam> /// <typeparam name="IsAligned">Whether the vector is aligned for simd usage.</typeparam>
template<typename T, size_t L, bool IsAligned> template<typename T, size_t L, bool IsAligned>
struct use_simd struct use_simd

View File

@ -770,6 +770,45 @@ namespace Phanes::Core::Math::Detail
} }
}; };
template<>
struct compute_mat3_mul<float, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TMatrix3<float, true>& r, const TMatrix3<float, true>& m1, const TMatrix3<float, true>& m2)
{
// First column
__m128 tmp0 = _mm_mul_ps(m1.c0.data, m2.c0.data);
__m128 tmp1 = _mm_mul_ps(m1.c1.data, m2.c0.data);
__m128 tmp2 = _mm_mul_ps(m1.c2.data, m2.c0.data);
r.c0.data = _mm_add_ps(_mm_add_ps(tmp0, tmp1), tmp2);
// Second column
__m128 tmp0 = _mm_mul_ps(m1.c0.data, m2.c1.data);
__m128 tmp1 = _mm_mul_ps(m1.c1.data, m2.c1.data);
__m128 tmp2 = _mm_mul_ps(m1.c2.data, m2.c1.data);
r.c1.data = _mm_add_ps(_mm_add_ps(tmp0, tmp1), tmp2);
// Third column
__m128 tmp0 = _mm_mul_ps(m1.c0.data, m2.c2.data);
__m128 tmp1 = _mm_mul_ps(m1.c1.data, m2.c2.data);
__m128 tmp2 = _mm_mul_ps(m1.c2.data, m2.c2.data);
r.c2.data = _mm_add_ps(_mm_add_ps(tmp0, tmp1), tmp2);
}
static FORCEINLINE void map(Phanes::Core::Math::TVector3<float, true>& r, const TMatrix3<float, true>& m1, const TVector3<float, true>& v)
{
__m128 tmp0 = _mm_mul_ps(m1.c0.data, v.data);
__m128 tmp1 = _mm_mul_ps(m1.c1.data, v.data);
__m128 tmp2 = _mm_mul_ps(m1.c2.data, v.data);
r.data = _mm_add_ps(_mm_add_ps(tmp0, tmp1), tmp2);
}
};
// =========== // // =========== //
// Matrix4 // // Matrix4 //
// =========== // // =========== //

View File

@ -14,17 +14,13 @@ namespace Phanes::Core::Math::SIMD
template<size_t L, typename T> template<size_t L, typename T>
struct Storage<L, T, false> struct Storage<L, T, false>
{ {
typedef struct type { typedef T type[L];
T data[L];
} type;
}; };
template<typename T> template<typename T>
struct Storage<3, T, false> struct Storage<3, T, false>
{ {
typedef struct type { typedef T type[4];
T data[4];
} type;
}; };